diff --git a/.travis.yml b/.travis.yml
index 5dc901561e8..6dd5489ab7b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,16 +35,7 @@ git:
   depth: 250
 
 before_install:
-  # Common pre-install steps for all builds
-  - eval "${MATRIX_EVAL}"
-  - ulimit -c unlimited -S
-  - |
-      if [ $TRAVIS_OS_NAME == "linux" ]; then
-        sudo bash -c "echo -e 'Acquire::Retries 10; Acquire::http::Timeout \"20\";' > /etc/apt/apt.conf.d/99-travis-retry"
-        sudo apt-get update -qq
-      fi
-  - eval `python $TRAVIS_BUILD_DIR/ci/detect-changes.py`
-  - if [ $TRAVIS_OS_NAME == "linux" ]; then ccache -s; fi
+  - source ci/travis/before-install.sh
 
 matrix:
   fast_finish: true
@@ -62,28 +53,13 @@ matrix:
     - $TRAVIS_BUILD_DIR/ci/travis_release_test.sh
   - name: "C++ unit tests w/ clang 7.0, system packages"
     os: linux
-    env:
-    - ARROW_TRAVIS_USE_SYSTEM=1
-    - ARROW_TRAVIS_PLASMA=1
-    - ARROW_TRAVIS_ORC=1
-    - ARROW_TRAVIS_PARQUET=1
-    - ARROW_TRAVIS_GANDIVA=1
-    - ARROW_TRAVIS_VERBOSE=1
-    - ARROW_TRAVIS_USE_SYSTEM_JAVA=1
-    - ARROW_BUILD_WARNING_LEVEL=CHECKIN
-    - ARROW_USE_ASAN=1
-    - ARROW_USE_UBSAN=1
-    - CC="clang-7"
-    - CXX="clang++-7"
+    language: cpp
     before_script:
     - if [ $ARROW_CI_CPP_AFFECTED != "1" ]; then exit; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_install_clang_tools.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
-    # If either C++ or Python changed, we must install the C++ libraries
     - git submodule update --init
-    - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
+    - docker-compose build --pull cpp-system-deps
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh || travis_terminate 1
+    - docker-compose run cpp-system-deps
   # Separating Valgrind and C++ coverage makes individual jobs shorter
   - name: "C++ unit tests w/ conda-forge toolchain, coverage"
     compiler: gcc
@@ -144,7 +120,7 @@ matrix:
     # Only run Plasma tests with valgrind in one of the Python builds because
     # they are slow
     - export PLASMA_VALGRIND=1
-    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 || travis_terminate 1
+    - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
     - $TRAVIS_BUILD_DIR/ci/travis_upload_cpp_coverage.sh
   - name: "[OS X] C++ w/ XCode 9.3"
     compiler: clang
@@ -191,40 +167,14 @@ matrix:
     script:
     - if [ $ARROW_CI_PYTHON_AFFECTED != "1" ]; then exit; fi
     - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6
-  - name: "[manylinux1] Python"
+  - name: "Java OpenJDK8 and OpenJDK11"
     language: cpp
-    env:
-    - PYTHON_VERSIONS="3.6,16 3.7,16"
-    before_script:
-    - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker-compose pull python-manylinux1; fi
-    script:
-    - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_manylinux.sh; fi
-  - name: "Java w/ OpenJDK 8"
-    language: java
-    os: linux
-    jdk: openjdk8
-    before_script:
-    - if [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
-    - $TRAVIS_BUILD_DIR/ci/travis_install_linux.sh
-    script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
-    - $TRAVIS_BUILD_DIR/ci/travis_script_javadoc.sh
-  - name: "Java w/ OpenJDK 9"
-    language: java
-    os: linux
-    jdk: openjdk9
-    before_script:
-    - if [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
-    script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
-  - name: "Java w/ OpenJDK 11"
-    language: java
     os: linux
-    jdk: openjdk11
     before_script:
     - if [ $ARROW_CI_JAVA_AFFECTED != "1" ]; then exit; fi
+    - docker-compose build --pull java-all-jdks
     script:
-    - $TRAVIS_BUILD_DIR/ci/travis_script_java.sh
+    - docker-compose run java-all-jdks
   - name: "Integration w/ OpenJDK 8, conda-forge toolchain"
     language: java
     os: linux
@@ -325,6 +275,8 @@ matrix:
     after_success:
       - pushd ${TRAVIS_BUILD_DIR}/rust
       # Run coverage for codecov.io
+      - export ARROW_TEST_DATA=$TRAVIS_BUILD_DIR/testing/data
+      - export PARQUET_TEST_DATA=$TRAVIS_BUILD_DIR/cpp/submodules/parquet-testing/data
       - cargo tarpaulin --out Xml
       - bash <(curl -s https://codecov.io/bash) || echo "Codecov did not collect coverage reports"
   - name: Go
@@ -381,23 +333,10 @@ matrix:
     - Rscript ../ci/travis_upload_r_coverage.R
 
 after_failure:
-- |
-    if [ "$TRAVIS_OS_NAME" = "osx" ]; then
-      COREFILE=$(find /cores -maxdepth 1 -type f -name "core.*" | head -n 1)
-      if [[ -f "$COREFILE" ]]; then
-        lldb -c "$COREFILE" --batch --one-line "thread backtrace all -e true"
-      fi
-      ls -la ~/Library/Logs/DiagnosticReports/
-      cat ~/Library/Logs/DiagnosticReports/*.crash
-    else
-      COREFILE=$(find . -maxdepth 2 -name "core*" | head -n 1)
-      if [[ -f "$COREFILE" ]]; then
-        gdb -c "$COREFILE" example -ex "thread apply all bt" -ex "set pagination 0" -batch
-      fi
-    fi
+  - source ci/travis/after-failure.sh
 
 after_script:
-  - if [ $TRAVIS_OS_NAME == "linux" ]; then ccache -s; fi
+  - source ci/travis/after-script.sh
 
 env:
   global:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fdd35b25728..64def81751f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,63 @@
   under the License.
 -->
 
+# Apache Arrow 0.14.1 (16 July 2019)
+
+## Bug
+
+* ARROW-5775 - [C++] StructArray : cached boxed fields not thread-safe
+* ARROW-5790 - [Python] Passing zero-dim numpy array to pa.array causes segfault
+* ARROW-5791 - [Python] pyarrow.csv.read\_csv hangs + eats all RAM
+* ARROW-5816 - [Release] Parallel curl does not work reliably in verify-release-candidate-sh
+* ARROW-5836 - [Java][OSX] Flight tests are failing: address already in use
+* ARROW-5838 - [C++][Flight][OSX] Building 3rdparty grpc cannot find OpenSSL
+* ARROW-5849 - [C++] Compiler warnings on mingw-w64
+* ARROW-5851 - [C++] Compilation of reference benchmarks fails
+* ARROW-5856 - [Python] linking 3rd party cython modules against pyarrow fails since 0.14.0
+* ARROW-5863 - [Python] Segmentation Fault via pytest-runner
+* ARROW-5868 - [Python] manylinux2010 wheels have shared library dependency on liblz4
+* ARROW-5873 - [Python] Segmentation fault when comparing schema with None
+* ARROW-5874 - [Python] pyarrow 0.14.0 macOS wheels depend on shared libs under /usr/local/opt
+* ARROW-5878 - [Python][C++] Parquet reader not forward compatible for timestamps without timezone
+* ARROW-5886 - [Python][Packaging] Manylinux1/2010 compliance issue with libz
+* ARROW-5887 - [C#] ArrowStreamWriter writes FieldNodes in wrong order
+* ARROW-5889 - [Python][C++] Parquet backwards compat for timestamps without timezone broken
+* ARROW-5899 - [Python][Packaging] Bundle uriparser.dll in windows wheels 
+* ARROW-5921 - [C++][Fuzzing] Missing nullptr checks in IPC
+* ARROW-5946 - [Rust] [DataFusion] Projection push down with aggregate producing incorrect results
+
+## Improvement
+
+* ARROW-5380 - [C++] Fix and enable UBSan for unaligned accesses.
+* ARROW-5564 - [C++] Add uriparser to conda-forge
+* ARROW-5609 - [C++] Set CMP0068 CMake policy to avoid macOS warnings
+* ARROW-5784 - [Release][GLib] Replace c\_glib/ after running c\_glib/autogen.sh in dev/release/02-source.sh
+* ARROW-5785 - [Rust] Rust datafusion implementation should not depend on rustyline
+* ARROW-5787 - [Release][Rust] Use local modules to verify RC
+* ARROW-5793 - [Release] Avoid duplicate known host SSH error in dev/release/03-binary.sh
+* ARROW-5794 - [Release] Skip uploading already uploaded binaries
+* ARROW-5795 - [Release] Add missing waits on uploading binaries
+* ARROW-5796 - [Release][APT] Update expected package list
+* ARROW-5797 - [Release][APT] Update supported distributions
+* ARROW-5820 - [Release] Remove undefined variable check from verify script
+* ARROW-5827 - [C++] Require c-ares CMake config
+* ARROW-5828 - [C++] Add Protocol Buffers version check
+* ARROW-5866 - [C++] Remove duplicate library in cpp/Brewfile
+* ARROW-5877 - [FlightRPC] Fix auth incompatibilities between Python/Java
+* ARROW-5904 - [Java] [Plasma] Fix compilation of Plasma Java client
+* ARROW-5908 - [C#] ArrowStreamWriter doesn't align buffers to 8 bytes
+* ARROW-5911 - [Java] Make ListVector and MapVector create reader lazily
+* ARROW-5937 - [Release] Stop parallel binary upload
+* ARROW-5938 - [Release] Create branch for adding release note automatically
+* ARROW-5939 - [Release] Add support for generating vote email template separately
+* ARROW-5940 - [Release] Add support for re-uploading sign/checksum for binary artifacts
+* ARROW-5941 - [Release] Avoid re-uploading already uploaded binary artifacts
+
+## Task
+
+* ARROW-5934 - [Python] Bundle arrow's LICENSE with the wheels
+* ARROW-5958 - [Python] Link zlib statically in the wheels
+
 # Apache Arrow 0.14.0 (29 June 2019)
 
 ## Bug
diff --git a/LICENSE.txt b/LICENSE.txt
index df5f5cce783..ad5a1ba4f43 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -572,6 +572,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 
 --------------------------------------------------------------------------------
+
 cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and
 cpp/src/arrow/util/logging-test.cc are adapted from
 Ray Project (https://github.com/ray-project/ray) (Apache 2.0).
@@ -893,7 +894,7 @@ Copyright (c)  Copyright 2017 Asylo authors
 Homepage: https://asylo.dev/
 License: Apache 2.0
 
----------------------------------------------------------------------------------
+--------------------------------------------------------------------------------
 
 This project includes code from Google's protobuf project
 
@@ -936,6 +937,732 @@ support library is itself covered by the above license.
 
 --------------------------------------------------------------------------------
 
+3rdparty dependency gRPC is statically linked in certain binary
+distributions, like the python wheels. gRPC has the following license:
+
+Copyright 2014 gRPC authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Apache Thrift is statically linked in certain binary
+distributions, like the python wheels. Apache Thrift has the following license:
+
+Apache Thrift
+Copyright (C) 2006 - 2019, The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Apache ORC is statically linked in certain binary
+distributions, like the python wheels. Apache ORC has the following license:
+
+Apache ORC
+Copyright 2013-2019 The Apache Software Foundation
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+This product includes software developed by Hewlett-Packard:
+(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency uriparser is statically linked in certain binary
+distributions, like the python wheels. uriparser has the following license:
+
+uriparser - RFC 3986 URI parsing library
+
+Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
+Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
+All rights reserved.
+
+Redistribution  and use in source and binary forms, with or without
+modification,  are permitted provided that the following conditions
+are met:
+
+    * Redistributions   of  source  code  must  retain  the   above
+      copyright  notice, this list of conditions and the  following
+      disclaimer.
+
+    * Redistributions  in  binary  form must  reproduce  the  above
+      copyright  notice, this list of conditions and the  following
+      disclaimer   in  the  documentation  and/or  other  materials
+      provided with the distribution.
+
+    * Neither  the name of the <ORGANIZATION> nor the names of  its
+      contributors  may  be  used to endorse  or  promote  products
+      derived  from  this software without specific  prior  written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT  NOT
+LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+FOR  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT  SHALL  THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL,    SPECIAL,   EXEMPLARY,   OR   CONSEQUENTIAL   DAMAGES
+(INCLUDING,  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES;  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency zstd is statically linked in certain binary
+distributions, like the python wheels. ZSTD has the following license:
+
+BSD License
+
+For Zstandard software
+
+Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency lz4 is statically linked in certain binary
+distributions, like the python wheels. lz4 has the following license:
+
+LZ4 Library
+Copyright (c) 2011-2016, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Brotli is statically linked in certain binary
+distributions, like the python wheels. Brotli has the following license:
+
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency rapidjson is statically linked in certain binary
+distributions, like the python wheels. rapidjson and its dependencies have the
+following licenses:
+
+Tencent is pleased to support the open source community by making RapidJSON
+available.
+
+Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
+All rights reserved.
+
+If you have downloaded a copy of the RapidJSON binary from Tencent, please note
+that the RapidJSON binary is licensed under the MIT License.
+If you have downloaded a copy of the RapidJSON source code from Tencent, please
+note that RapidJSON source code is licensed under the MIT License, except for
+the third-party components listed below which are subject to different license
+terms.  Your integration of RapidJSON into your own projects may require
+compliance with the MIT License, as well as the other licenses applicable to
+the third-party components included within RapidJSON. To avoid the problematic
+JSON license in your own projects, it's sufficient to exclude the
+bin/jsonchecker/ directory, as it's the only code under the JSON license.
+A copy of the MIT License is included in this file.
+
+Other dependencies and licenses:
+
+    Open Source Software Licensed Under the BSD License:
+    --------------------------------------------------------------------
+
+    The msinttypes r29
+    Copyright (c) 2006-2013 Alexander Chemeris
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+    * Neither the name of  copyright holder nor the names of its contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+    EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR
+    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+    DAMAGE.
+
+    Open Source Software Licensed Under the JSON License:
+    --------------------------------------------------------------------
+
+    json.org
+    Copyright (c) 2002 JSON.org
+    All Rights Reserved.
+
+    JSON_checker
+    Copyright (c) 2002 JSON.org
+    All Rights Reserved.
+
+
+    Terms of the JSON License:
+    ---------------------------------------------------
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    The Software shall be used for Good, not Evil.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+
+    Terms of the MIT License:
+    --------------------------------------------------------------------
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency double-conversion is statically linked in certain binary
+distributions, like the python wheels. double-conversion has the following
+license:
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency snappy is statically linked in certain binary
+distributions, like the python wheels. snappy has the following license:
+
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of Google Inc. nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+===
+
+Some of the benchmark data in testdata/ is licensed differently:
+
+ - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and
+   is licensed under the Creative Commons Attribution 3.0 license
+   (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/
+   for more information.
+
+ - kppkn.gtb is taken from the Gaviota chess tablebase set, and
+   is licensed under the MIT License. See
+   https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1
+   for more information.
+
+ - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper
+   “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA
+   Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro,
+   which is licensed under the CC-BY license. See
+   http://www.ploscompbiol.org/static/license for more ifnormation.
+
+ - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project
+   Gutenberg. The first three have expired copyrights and are in the public
+   domain; the latter does not have expired copyright, but is still in the
+   public domain according to the license information
+   (http://www.gutenberg.org/ebooks/53).
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency gflags is statically linked in certain binary
+distributions, like the python wheels. gflags has the following license:
+
+Copyright (c) 2006, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency glog is statically linked in certain binary
+distributions, like the python wheels. glog has the following license:
+
+Copyright (c) 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+A function gettimeofday in utilities.cc is based on
+
+http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd
+
+The license of this code is:
+
+Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi> and contributors
+All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the name(s) of the above-listed copyright holder(s) nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency re2 is statically linked in certain binary
+distributions, like the python wheels. re2 has the following license:
+
+Copyright (c) 2009 The RE2 Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its contributors
+      may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency c-ares is statically linked in certain binary
+distributions, like the python wheels. c-ares has the following license:
+
+# c-ares license
+
+Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS
+file.
+
+Copyright 1998 by the Massachusetts Institute of Technology.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted, provided that
+the above copyright notice appear in all copies and that both that copyright
+notice and this permission notice appear in supporting documentation, and that
+the name of M.I.T. not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior permission.
+M.I.T. makes no representations about the suitability of this software for any
+purpose.  It is provided "as is" without express or implied warranty.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency zlib is redistributed as a dynamically linked shared
+library in certain binary distributions, like the python wheels. In the future
+this will likely change to static linkage. zlib has the following license:
+
+zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.11, January 15th, 2017
+
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency openssl is redistributed as a dynamically linked shared
+library in certain binary distributions, like the python wheels. openssl
+preceding version 3 has the following license:
+
+  LICENSE ISSUES
+  ==============
+
+  The OpenSSL toolkit stays under a double license, i.e. both the conditions of
+  the OpenSSL License and the original SSLeay license apply to the toolkit.
+  See below for the actual license texts.
+
+  OpenSSL License
+  ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2019 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+--------------------------------------------------------------------------------
+
 This project includes code from the rtools-backports project.
 
 * ci/PKGBUILD and ci/appveyor-build-r.sh are based on code
@@ -945,3 +1672,90 @@ Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms.
 All rights reserved.
 Homepage: https://github.com/r-windows/rtools-backports
 License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+Some code from pandas has been adapted for the pyarrow codebase. pandas is
+available under the 3-clause BSD license, which follows:
+
+pandas license
+==============
+
+Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Copyright (c) 2008-2011 AQR Capital Management, LLC
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the copyright holder nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+Some bits from DyND, in particular aspects of the build system, have been
+adapted from libdynd and dynd-python under the terms of the BSD 2-clause
+license
+
+The BSD 2-Clause License
+
+    Copyright (C) 2011-12, Dynamic NDArray Developers
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+
+        * Redistributions in binary form must reproduce the above
+           copyright notice, this list of conditions and the following
+           disclaimer in the documentation and/or other materials provided
+           with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Dynamic NDArray Developers list:
+
+ * Mark Wiebe
+ * Continuum Analytics
+
+--------------------------------------------------------------------------------
+
+Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted
+for PyArrow. Ibis is released under the Apache License, Version 2.0.
diff --git a/NOTICE.txt b/NOTICE.txt
index 35fb521507f..a609791374c 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -68,3 +68,17 @@ its NOTICE file:
 
   Portions of this software were developed at
   Cloudera, Inc (http://www.cloudera.com/).
+
+--------------------------------------------------------------------------------
+
+This product includes code from Apache ORC, which includes the following in
+its NOTICE file:
+
+  Apache ORC
+  Copyright 2013-2019 The Apache Software Foundation
+
+  This product includes software developed by The Apache Software
+  Foundation (http://www.apache.org/).
+
+  This product includes software developed by Hewlett-Packard:
+  (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P
diff --git a/README.md b/README.md
index 24157b3f4c7..ff8f52dfcac 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@
 
 [![Build Status](https://travis-ci.org/apache/arrow.svg?branch=master)](https://travis-ci.org/apache/arrow)
 [![Coverage Status](https://codecov.io/gh/apache/arrow/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/arrow?branch=master)
+[![Fuzzit Status](https://app.fuzzit.dev/badge?org_id=yMxZh42xl9qy6bvg3EiJ&branch=master)](https://app.fuzzit.dev/admin/yMxZh42xl9qy6bvg3EiJ/dashboard)
 [![License](http://img.shields.io/:license-Apache%202-blue.svg)](https://github.com/apache/arrow/blob/master/LICENSE.txt)
 [![Twitter Follow](https://img.shields.io/twitter/follow/apachearrow.svg?style=social&label=Follow)](https://twitter.com/apachearrow)
 
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index a2965955714..998103c0550 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -55,7 +55,6 @@ libarrow_glib_la_headers =			\
 	buffer.h				\
 	chunked-array.h				\
 	codec.h					\
-	column.h				\
 	composite-array.h			\
 	composite-data-type.h			\
 	data-type.h				\
@@ -107,7 +106,6 @@ libarrow_glib_la_sources =			\
 	buffer.cpp				\
 	chunked-array.cpp			\
 	codec.cpp				\
-	column.cpp				\
 	composite-array.cpp			\
 	composite-data-type.cpp			\
 	decimal128.cpp				\
@@ -153,7 +151,6 @@ libarrow_glib_la_cpp_headers =			\
 	buffer.hpp				\
 	chunked-array.hpp			\
 	codec.hpp				\
-	column.hpp				\
 	data-type.hpp				\
 	decimal128.hpp				\
 	error.hpp				\
@@ -187,9 +184,13 @@ libarrow_glib_la_cpp_headers +=			\
 	orc-file-reader.hpp
 endif
 
+libarrow_glib_la_cpp_internal_headers =		\
+	internal-index.hpp
+
 libarrow_glib_la_SOURCES =			\
 	$(libarrow_glib_la_sources)		\
-	$(libarrow_glib_la_cpp_headers)
+	$(libarrow_glib_la_cpp_headers)		\
+	$(libarrow_glib_la_cpp_internal_headers)
 
 BUILT_SOURCES =					\
 	$(libarrow_glib_la_genearted_headers)	\
diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index 2a4de13f2da..3f44c665edf 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -26,7 +26,6 @@
 #include <arrow-glib/array-builder.h>
 #include <arrow-glib/chunked-array.h>
 #include <arrow-glib/codec.h>
-#include <arrow-glib/column.h>
 #include <arrow-glib/compute.h>
 #include <arrow-glib/data-type.h>
 #include <arrow-glib/enums.h>
diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp
index ac8563cb543..d755b2ba6b1 100644
--- a/c_glib/arrow-glib/arrow-glib.hpp
+++ b/c_glib/arrow-glib/arrow-glib.hpp
@@ -26,7 +26,6 @@
 #include <arrow-glib/buffer.hpp>
 #include <arrow-glib/chunked-array.hpp>
 #include <arrow-glib/codec.hpp>
-#include <arrow-glib/column.hpp>
 #include <arrow-glib/data-type.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
diff --git a/c_glib/arrow-glib/chunked-array.cpp b/c_glib/arrow-glib/chunked-array.cpp
index 6d9598bc106..20437c2c6e7 100644
--- a/c_glib/arrow-glib/chunked-array.cpp
+++ b/c_glib/arrow-glib/chunked-array.cpp
@@ -206,9 +206,25 @@ garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array)
  * @chunked_array: A #GArrowChunkedArray.
  *
  * Returns: The total number of rows in the chunked array.
+ *
+ * Deprecated: 1.0.0: Use garrow_chunked_array_get_n_rows() instead.
  */
 guint64
 garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array)
+{
+  return garrow_chunked_array_get_n_rows(chunked_array);
+}
+
+/**
+ * garrow_chunked_array_get_n_rows:
+ * @chunked_array: A #GArrowChunkedArray.
+ *
+ * Returns: The total number of rows in the chunked array.
+ *
+ * Since: 1.0.0
+ */
+guint64
+garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array)
 {
   const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
   return arrow_chunked_array->length();
diff --git a/c_glib/arrow-glib/chunked-array.h b/c_glib/arrow-glib/chunked-array.h
index 882f8f2d3f5..8c67eead2cd 100644
--- a/c_glib/arrow-glib/chunked-array.h
+++ b/c_glib/arrow-glib/chunked-array.h
@@ -44,7 +44,10 @@ garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array);
 GArrowType
 garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array);
 
+GARROW_DEPRECATED_IN_1_0_FOR(garrow_chunked_array_get_n_rows)
 guint64 garrow_chunked_array_get_length (GArrowChunkedArray *chunked_array);
+GARROW_AVAILABLE_IN_1_0
+guint64 garrow_chunked_array_get_n_rows (GArrowChunkedArray *chunked_array);
 guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array);
 guint   garrow_chunked_array_get_n_chunks (GArrowChunkedArray *chunked_array);
 
diff --git a/c_glib/arrow-glib/column.cpp b/c_glib/arrow-glib/column.cpp
deleted file mode 100644
index 68694b3d679..00000000000
--- a/c_glib/arrow-glib/column.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#include <arrow-glib/array.hpp>
-#include <arrow-glib/chunked-array.hpp>
-#include <arrow-glib/column.hpp>
-#include <arrow-glib/data-type.hpp>
-#include <arrow-glib/error.hpp>
-#include <arrow-glib/field.hpp>
-
-#include <sstream>
-
-G_BEGIN_DECLS
-
-/**
- * SECTION: column
- * @short_description: Column class
- *
- * #GArrowColumn is a class for column. Column has a #GArrowField and
- * zero or more values. Values are #GArrowChunkedArray.
- */
-
-typedef struct GArrowColumnPrivate_ {
-  std::shared_ptr<arrow::Column> column;
-  GArrowField *field;
-  GArrowArray *array;
-  GArrowChunkedArray *chunked_array;
-} GArrowColumnPrivate;
-
-enum {
-  PROP_0,
-  PROP_COLUMN,
-  PROP_FIELD,
-  PROP_ARRAY,
-  PROP_CHUNKED_ARRAY
-};
-
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowColumn,
-                           garrow_column,
-                           G_TYPE_OBJECT)
-
-#define GARROW_COLUMN_GET_PRIVATE(object)          \
-  static_cast<GArrowColumnPrivate *>(              \
-    garrow_column_get_instance_private(            \
-      GARROW_COLUMN(object)))
-
-static void
-garrow_column_dispose(GObject *object)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(object);
-
-  if (priv->field) {
-    g_object_unref(priv->field);
-    priv->field = nullptr;
-  }
-
-  if (priv->array) {
-    g_object_unref(priv->array);
-    priv->array = nullptr;
-  }
-
-  if (priv->chunked_array) {
-    g_object_unref(priv->chunked_array);
-    priv->chunked_array = nullptr;
-  }
-
-  G_OBJECT_CLASS(garrow_column_parent_class)->dispose(object);
-}
-
-static void
-garrow_column_finalize(GObject *object)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(object);
-
-  priv->column = nullptr;
-
-  G_OBJECT_CLASS(garrow_column_parent_class)->finalize(object);
-}
-
-static void
-garrow_column_set_property(GObject *object,
-                           guint prop_id,
-                           const GValue *value,
-                           GParamSpec *pspec)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_COLUMN:
-    priv->column =
-      *static_cast<std::shared_ptr<arrow::Column> *>(g_value_get_pointer(value));
-    break;
-  case PROP_FIELD:
-    priv->field = static_cast<GArrowField *>(g_value_dup_object(value));
-    break;
-  case PROP_ARRAY:
-    priv->array = static_cast<GArrowArray *>(g_value_dup_object(value));
-    break;
-  case PROP_CHUNKED_ARRAY:
-    priv->chunked_array =
-      static_cast<GArrowChunkedArray *>(g_value_dup_object(value));
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_column_get_property(GObject *object,
-                           guint prop_id,
-                           GValue *value,
-                           GParamSpec *pspec)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(object);
-
-  switch (prop_id) {
-  case PROP_FIELD:
-    g_value_set_object(value, priv->field);
-    break;
-  case PROP_ARRAY:
-    g_value_set_object(value, priv->array);
-    break;
-  case PROP_CHUNKED_ARRAY:
-    g_value_set_object(value, priv->chunked_array);
-    break;
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
-static void
-garrow_column_init(GArrowColumn *object)
-{
-}
-
-static void
-garrow_column_class_init(GArrowColumnClass *klass)
-{
-  auto gobject_class = G_OBJECT_CLASS(klass);
-
-  gobject_class->dispose      = garrow_column_dispose;
-  gobject_class->finalize     = garrow_column_finalize;
-  gobject_class->set_property = garrow_column_set_property;
-  gobject_class->get_property = garrow_column_get_property;
-
-  GParamSpec *spec;
-  spec = g_param_spec_pointer("column",
-                              "Column",
-                              "The raw std::shared<arrow::Column> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_COLUMN, spec);
-
-  spec = g_param_spec_object("field",
-                             "Field",
-                             "The field of the column",
-                             GARROW_TYPE_FIELD,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_FIELD, spec);
-
-  spec = g_param_spec_object("array",
-                             "Array",
-                             "The array of the column",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_ARRAY, spec);
-
-  spec = g_param_spec_object("chunked-array",
-                             "Chunked array",
-                             "The chunked array of the column",
-                             GARROW_TYPE_CHUNKED_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_CHUNKED_ARRAY, spec);
-}
-
-/**
- * garrow_column_new_array:
- * @field: The metadata of the column.
- * @array: The data of the column.
- *
- * Returns: A newly created #GArrowColumn.
- */
-GArrowColumn *
-garrow_column_new_array(GArrowField *field,
-                        GArrowArray *array)
-{
-  auto arrow_column =
-    std::make_shared<arrow::Column>(garrow_field_get_raw(field),
-                                    garrow_array_get_raw(array));
-  auto column = GARROW_COLUMN(g_object_new(GARROW_TYPE_COLUMN,
-                                           "column", &arrow_column,
-                                           "field", field,
-                                           "array", array,
-                                           NULL));
-  return column;
-}
-
-/**
- * garrow_column_new_chunked_array:
- * @field: The metadata of the column.
- * @chunked_array: The data of the column.
- *
- * Returns: A newly created #GArrowColumn.
- */
-GArrowColumn *
-garrow_column_new_chunked_array(GArrowField *field,
-                                GArrowChunkedArray *chunked_array)
-{
-  auto arrow_column =
-    std::make_shared<arrow::Column>(garrow_field_get_raw(field),
-                                    garrow_chunked_array_get_raw(chunked_array));
-  auto column = GARROW_COLUMN(g_object_new(GARROW_TYPE_COLUMN,
-                                           "column", &arrow_column,
-                                           "field", field,
-                                           "chunked-array", chunked_array,
-                                           NULL));
-  return column;
-}
-
-/**
- * garrow_column_slice:
- * @column: A #GArrowColumn.
- * @offset: The offset of sub #GArrowColumn.
- * @length: The length of sub #GArrowColumn.
- *
- * Returns: (transfer full): The sub #GArrowColumn. It covers only from
- *   `offset` to `offset + length` range. The sub #GArrowColumn shares
- *   values with the base #GArrowColumn.
- */
-GArrowColumn *
-garrow_column_slice(GArrowColumn *column,
-                    guint64 offset,
-                    guint64 length)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  auto arrow_sub_column = arrow_column->Slice(offset, length);
-  return garrow_column_new_raw(&arrow_sub_column);
-}
-
-/**
- * garrow_column_equal:
- * @column: A #GArrowColumn.
- * @other_column: A #GArrowColumn to be compared.
- *
- * Returns: %TRUE if both of them have the same data, %FALSE
- *   otherwise.
- *
- * Since: 0.4.0
- */
-gboolean
-garrow_column_equal(GArrowColumn *column, GArrowColumn *other_column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  const auto arrow_other_column = garrow_column_get_raw(other_column);
-  return arrow_column->Equals(arrow_other_column);
-}
-
-/**
- * garrow_column_get_length:
- * @column: A #GArrowColumn.
- *
- * Returns: The number of data of the column.
- */
-guint64
-garrow_column_get_length(GArrowColumn *column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  return arrow_column->length();
-}
-
-/**
- * garrow_column_get_n_nulls:
- * @column: A #GArrowColumn.
- *
- * Returns: The number of nulls of the column.
- */
-guint64
-garrow_column_get_n_nulls(GArrowColumn *column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  return arrow_column->null_count();
-}
-
-/**
- * garrow_column_get_field:
- * @column: A #GArrowColumn.
- *
- * Returns: (transfer full): The metadata of the column.
- */
-GArrowField *
-garrow_column_get_field(GArrowColumn *column)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(column);
-  if (priv->field) {
-    g_object_ref(priv->field);
-    return priv->field;
-  } else {
-    const auto arrow_column = garrow_column_get_raw(column);
-    auto arrow_field = arrow_column->field();
-    auto data_type = garrow_column_get_data_type(column);
-    auto field = garrow_field_new_raw(&arrow_field, data_type);
-    g_object_unref(data_type);
-    return field;
-  }
-}
-
-/**
- * garrow_column_get_name:
- * @column: A #GArrowColumn.
- *
- * Returns: The name of the column.
- */
-const gchar *
-garrow_column_get_name(GArrowColumn *column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  return arrow_column->name().c_str();
-}
-
-/**
- * garrow_column_get_data_type:
- * @column: A #GArrowColumn.
- *
- * Returns: (transfer full): The data type of the column.
- */
-GArrowDataType *
-garrow_column_get_data_type(GArrowColumn *column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  auto arrow_data_type = arrow_column->type();
-  return garrow_data_type_new_raw(&arrow_data_type);
-}
-
-/**
- * garrow_column_get_data:
- * @column: A #GArrowColumn.
- *
- * Returns: (transfer full): The data of the column.
- */
-GArrowChunkedArray *
-garrow_column_get_data(GArrowColumn *column)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  auto arrow_chunked_array = arrow_column->data();
-  return garrow_chunked_array_new_raw(&arrow_chunked_array);
-}
-
-/**
- * garrow_column_to_string:
- * @column: A #GArrowColumn.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (nullable) (transfer full):
- *   The formatted column content or %NULL on error.
- *
- *   The returned string should be freed when with g_free() when no
- *   longer needed.
- *
- * Since: 0.12.0
- */
-gchar *
-garrow_column_to_string(GArrowColumn *column, GError **error)
-{
-  const auto arrow_column = garrow_column_get_raw(column);
-  std::stringstream sink;
-  auto status = arrow::PrettyPrint(*arrow_column, 0, &sink);
-  if (garrow_error_check(error, status, "[column][to-string]")) {
-    return g_strdup(sink.str().c_str());
-  } else {
-    return NULL;
-  }
-}
-
-G_END_DECLS
-
-GArrowColumn *
-garrow_column_new_raw(std::shared_ptr<arrow::Column> *arrow_column)
-{
-  auto column = GARROW_COLUMN(g_object_new(GARROW_TYPE_COLUMN,
-                                           "column", arrow_column,
-                                           NULL));
-  return column;
-}
-
-std::shared_ptr<arrow::Column>
-garrow_column_get_raw(GArrowColumn *column)
-{
-  auto priv = GARROW_COLUMN_GET_PRIVATE(column);
-  return priv->column;
-}
diff --git a/c_glib/arrow-glib/column.h b/c_glib/arrow-glib/column.h
deleted file mode 100644
index 274595858dd..00000000000
--- a/c_glib/arrow-glib/column.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#pragma once
-
-#include <arrow-glib/array.h>
-#include <arrow-glib/chunked-array.h>
-#include <arrow-glib/field.h>
-
-G_BEGIN_DECLS
-
-#define GARROW_TYPE_COLUMN (garrow_column_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowColumn,
-                         garrow_column,
-                         GARROW,
-                         COLUMN,
-                         GObject)
-struct _GArrowColumnClass
-{
-  GObjectClass parent_class;
-};
-
-GArrowColumn *garrow_column_new_array(GArrowField *field,
-                                      GArrowArray *array);
-GArrowColumn *garrow_column_new_chunked_array(GArrowField *field,
-                                              GArrowChunkedArray *chunked_array);
-GArrowColumn *garrow_column_slice(GArrowColumn *column,
-                                  guint64 offset,
-                                  guint64 length);
-
-gboolean            garrow_column_equal         (GArrowColumn *column,
-                                                 GArrowColumn *other_column);
-
-guint64             garrow_column_get_length    (GArrowColumn *column);
-guint64             garrow_column_get_n_nulls   (GArrowColumn *column);
-GArrowField        *garrow_column_get_field     (GArrowColumn *column);
-const gchar        *garrow_column_get_name      (GArrowColumn *column);
-GArrowDataType     *garrow_column_get_data_type (GArrowColumn *column);
-GArrowChunkedArray *garrow_column_get_data      (GArrowColumn *column);
-gchar              *garrow_column_to_string     (GArrowColumn *column,
-                                                 GError **error);
-
-G_END_DECLS
diff --git a/c_glib/arrow-glib/error.cpp b/c_glib/arrow-glib/error.cpp
index a56b6ec3d13..4c1461543f8 100644
--- a/c_glib/arrow-glib/error.cpp
+++ b/c_glib/arrow-glib/error.cpp
@@ -65,22 +65,15 @@ garrow_error_code(const arrow::Status &status)
     return GARROW_ERROR_NOT_IMPLEMENTED;
   case arrow::StatusCode::SerializationError:
     return GARROW_ERROR_SERIALIZATION;
-  case arrow::StatusCode::PythonError:
-    return GARROW_ERROR_PYTHON;
-  case arrow::StatusCode::PlasmaObjectExists:
-    return GARROW_ERROR_PLASMA_OBJECT_EXISTS;
-  case arrow::StatusCode::PlasmaObjectNonexistent:
-    return GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT;
-  case arrow::StatusCode::PlasmaStoreFull:
-    return GARROW_ERROR_PLASMA_STORE_FULL;
-  case arrow::StatusCode::PlasmaObjectAlreadySealed:
-    return GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED;
   case arrow::StatusCode::CodeGenError:
     return GARROW_ERROR_CODE_GENERATION;
   case arrow::StatusCode::ExpressionValidationError:
     return GARROW_ERROR_EXPRESSION_VALIDATION;
   case arrow::StatusCode::ExecutionError:
     return GARROW_ERROR_EXECUTION;
+  case arrow::StatusCode::AlreadyExists:
+    return GARROW_ERROR_ALREADY_EXISTS;
+
   default:
     return GARROW_ERROR_UNKNOWN;
   }
diff --git a/c_glib/arrow-glib/error.h b/c_glib/arrow-glib/error.h
index 3dea9fc2e10..d600663592f 100644
--- a/c_glib/arrow-glib/error.h
+++ b/c_glib/arrow-glib/error.h
@@ -35,15 +35,11 @@ G_BEGIN_DECLS
  * @GARROW_ERROR_UNKNOWN: Unknown error.
  * @GARROW_ERROR_NOT_IMPLEMENTED: The feature is not implemented.
  * @GARROW_ERROR_SERIALIZATION: Serialization error.
- * @GARROW_ERROR_PYTHON: Python error.
- * @GARROW_ERROR_PLASMA_OBJECT_EXISTS: Object already exists on Plasma.
- * @GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT: Object doesn't exist on Plasma.
- * @GARROW_ERROR_PLASMA_STORE_FULL: Store full error on Plasma.
- * @GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED: Object already sealed on Plasma.
  * @GARROW_ERROR_CODE_GENERATION: Error generating code for expression evaluation
  *   in Gandiva.
  * @GARROW_ERROR_EXPRESSION_VALIDATION: Validation errors in expression given for code generation.
  * @GARROW_ERROR_EXECUTION: Execution error while evaluating the expression against a record batch.
+ * @GARROW_ERROR_ALREADY_EXISTS: Item already exists error.
  *
  * The error codes are used by all arrow-glib functions.
  *
@@ -60,14 +56,10 @@ typedef enum {
   GARROW_ERROR_UNKNOWN = 9,
   GARROW_ERROR_NOT_IMPLEMENTED,
   GARROW_ERROR_SERIALIZATION,
-  GARROW_ERROR_PYTHON,
-  GARROW_ERROR_PLASMA_OBJECT_EXISTS = 20,
-  GARROW_ERROR_PLASMA_OBJECT_NONEXISTENT,
-  GARROW_ERROR_PLASMA_STORE_FULL,
-  GARROW_ERROR_PLASMA_OBJECT_ALREADY_SEALED,
   GARROW_ERROR_CODE_GENERATION = 40,
   GARROW_ERROR_EXPRESSION_VALIDATION = 41,
   GARROW_ERROR_EXECUTION = 42,
+  GARROW_ERROR_ALREADY_EXISTS = 45,
 } GArrowError;
 
 #define GARROW_ERROR garrow_error_quark()
diff --git a/c_glib/arrow-glib/column.hpp b/c_glib/arrow-glib/internal-index.hpp
similarity index 78%
rename from c_glib/arrow-glib/column.hpp
rename to c_glib/arrow-glib/internal-index.hpp
index 4ebb742bb50..e3d709fc093 100644
--- a/c_glib/arrow-glib/column.hpp
+++ b/c_glib/arrow-glib/internal-index.hpp
@@ -19,9 +19,19 @@
 
 #pragma once
 
-#include <arrow/api.h>
+#include <glib.h>
 
-#include <arrow-glib/column.h>
-
-GArrowColumn *garrow_column_new_raw(std::shared_ptr<arrow::Column> *arrow_column);
-std::shared_ptr<arrow::Column> garrow_column_get_raw(GArrowColumn *column);
+static inline bool
+garrow_internal_index_adjust(gint &i, const gint max)
+{
+  if (i < 0) {
+    i += max;
+    if (i < 0) {
+      return false;
+    }
+  }
+  if (i >= max) {
+    return false;
+  }
+  return true;
+}
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 4e8e1cc111f..fd426328f8c 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -24,7 +24,6 @@ sources = files(
   'buffer.cpp',
   'chunked-array.cpp',
   'codec.cpp',
-  'column.cpp',
   'composite-array.cpp',
   'composite-data-type.cpp',
   'decimal128.cpp',
@@ -73,7 +72,6 @@ c_headers = files(
   'buffer.h',
   'chunked-array.h',
   'codec.h',
-  'column.h',
   'composite-array.h',
   'composite-data-type.h',
   'data-type.h',
@@ -126,7 +124,6 @@ cpp_headers = files(
   'buffer.hpp',
   'chunked-array.hpp',
   'codec.hpp',
-  'column.hpp',
   'data-type.hpp',
   'decimal128.hpp',
   'error.hpp',
@@ -165,6 +162,10 @@ if have_arrow_orc
   )
 endif
 
+cpp_internal_headers = files(
+  'internal-index.hpp',
+)
+
 version_h_conf = configuration_data()
 version_h_conf.set('GARROW_VERSION_MAJOR', version_major)
 version_h_conf.set('GARROW_VERSION_MINOR', version_minor)
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 88af8c7b37f..7783362104d 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -21,18 +21,18 @@
 #  include <config.h>
 #endif
 
-#include <arrow-glib/column.hpp>
+#include <arrow-glib/array.hpp>
+#include <arrow-glib/chunked-array.hpp>
 #include <arrow-glib/data-type.hpp>
 #include <arrow-glib/enums.h>
 #include <arrow-glib/error.hpp>
-#include <arrow-glib/record-batch.hpp>
-#include <arrow-glib/schema.hpp>
-#include <arrow-glib/table.hpp>
-
 #include <arrow-glib/input-stream.hpp>
-
+#include <arrow-glib/internal-index.hpp>
 #include <arrow-glib/metadata-version.hpp>
 #include <arrow-glib/reader.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+#include <arrow-glib/table.hpp>
 
 G_BEGIN_DECLS
 
@@ -734,9 +734,11 @@ garrow_feather_file_reader_get_n_columns(GArrowFeatherFileReader *reader)
 /**
  * garrow_feather_file_reader_get_column_name:
  * @reader: A #GArrowFeatherFileReader.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: (transfer full): The i-th column name in the file.
+ * Returns: (nullable) (transfer full): The i-th column name in the file.
  *
  *   It should be freed with g_free() when no longer needed.
  *
@@ -747,69 +749,52 @@ garrow_feather_file_reader_get_column_name(GArrowFeatherFileReader *reader,
                                            gint i)
 {
   auto arrow_reader = garrow_feather_file_reader_get_raw(reader);
-  auto column_name = arrow_reader->GetColumnName(i);
+  if (!garrow_internal_index_adjust(i, arrow_reader->num_columns())) {
+    return NULL;
+  }
+  const auto &column_name = arrow_reader->GetColumnName(i);
   return g_strndup(column_name.data(),
                    column_name.size());
 }
 
 /**
- * garrow_feather_file_reader_get_column:
+ * garrow_feather_file_reader_get_column_data:
  * @reader: A #GArrowFeatherFileReader.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable) (transfer full):
- *   The i-th column in the file or %NULL on error.
+ *   The i-th column's data in the file or %NULL on error.
  *
- * Since: 0.4.0
+ * Since: 1.0.0
  */
-GArrowColumn *
-garrow_feather_file_reader_get_column(GArrowFeatherFileReader *reader,
-                                      gint i,
-                                      GError **error)
+GArrowChunkedArray *
+garrow_feather_file_reader_get_column_data(GArrowFeatherFileReader *reader,
+                                           gint i,
+                                           GError **error)
 {
+  const auto tag = "[feather-file-reader][get-column-data]";
   auto arrow_reader = garrow_feather_file_reader_get_raw(reader);
-  std::shared_ptr<arrow::Column> arrow_column;
-  auto status = arrow_reader->GetColumn(i, &arrow_column);
 
-  if (garrow_error_check(error, status, "[feather-file-reader][get-column]")) {
-    return garrow_column_new_raw(&arrow_column);
-  } else {
+  const auto n_columns = arrow_reader->num_columns();
+  if (!garrow_internal_index_adjust(i, n_columns)) {
+    garrow_error_check(error,
+                       arrow::Status::IndexError("Out of index: "
+                                                 "<0..", n_columns, ">: "
+                                                 "<", i, ">"),
+                       tag);
     return NULL;
   }
-}
 
-/**
- * garrow_feather_file_reader_get_columns:
- * @reader: A #GArrowFeatherFileReader.
- * @error: (nullable): Return location for a #GError or %NULL.
- *
- * Returns: (element-type GArrowColumn) (transfer full):
- *   The columns in the file.
- *
- * Since: 0.4.0
- */
-GList *
-garrow_feather_file_reader_get_columns(GArrowFeatherFileReader *reader,
-                                       GError **error)
-{
-  GList *columns = NULL;
-  auto arrow_reader = garrow_feather_file_reader_get_raw(reader);
-  auto n_columns = arrow_reader->num_columns();
-  for (gint i = 0; i < n_columns; ++i) {
-    std::shared_ptr<arrow::Column> arrow_column;
-    auto status = arrow_reader->GetColumn(i, &arrow_column);
-    if (!garrow_error_check(error,
-                            status,
-                            "[feather-file-reader][get-columns]")) {
-      g_list_foreach(columns, (GFunc)g_object_unref, NULL);
-      g_list_free(columns);
-      return NULL;
-    }
-    columns = g_list_prepend(columns,
-                             garrow_column_new_raw(&arrow_column));
+  std::shared_ptr<arrow::ChunkedArray> arrow_chunked_array;
+  auto status = arrow_reader->GetColumn(i, &arrow_chunked_array);
+  if (garrow_error_check(error, status, tag)) {
+    return garrow_chunked_array_new_raw(&arrow_chunked_array);
+  } else {
+    return NULL;
   }
-  return g_list_reverse(columns);
 }
 
 /**
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index c2cce2dcc25..ff83e247890 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -222,13 +222,11 @@ gint64 garrow_feather_file_reader_get_n_columns(
 gchar *garrow_feather_file_reader_get_column_name(
   GArrowFeatherFileReader *reader,
   gint i);
-GArrowColumn *garrow_feather_file_reader_get_column(
-  GArrowFeatherFileReader *reader,
-  gint i,
-  GError **error);
-GList *garrow_feather_file_reader_get_columns(
-  GArrowFeatherFileReader *reader,
-  GError **error);
+GARROW_AVAILABLE_IN_1_0
+GArrowChunkedArray *
+garrow_feather_file_reader_get_column_data(GArrowFeatherFileReader *reader,
+                                           gint i,
+                                           GError **error);
 GArrowTable *
 garrow_feather_file_reader_read(GArrowFeatherFileReader *reader,
                                 GError **error);
diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp
index 04d442b409a..e566514e547 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -23,29 +23,13 @@
 
 #include <arrow-glib/array.hpp>
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-index.hpp>
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
-#include <arrow-glib/field.hpp>
 
 #include <sstream>
 
-static inline bool
-garrow_record_batch_adjust_index(const std::shared_ptr<arrow::RecordBatch> arrow_record_batch,
-                                 gint &i)
-{
-  auto n_columns = arrow_record_batch->num_columns();
-  if (i < 0) {
-    i += n_columns;
-    if (i < 0) {
-      return false;
-    }
-  }
-  if (i >= n_columns) {
-    return false;
-  }
-  return true;
-}
-
 G_BEGIN_DECLS
 
 /**
@@ -215,7 +199,7 @@ garrow_record_batch_get_schema(GArrowRecordBatch *record_batch)
 }
 
 /**
- * garrow_record_batch_get_column:
+ * garrow_record_batch_get_column_data:
  * @record_batch: A #GArrowRecordBatch.
  * @i: The index of the target column. If it's negative, index is
  *   counted backward from the end of the columns. `-1` means the last
@@ -223,41 +207,21 @@ garrow_record_batch_get_schema(GArrowRecordBatch *record_batch)
  *
  * Returns: (transfer full) (nullable): The i-th column in the record batch
  *   on success, %NULL on out of index.
+ *
+ * Since: 1.0.0
  */
 GArrowArray *
-garrow_record_batch_get_column(GArrowRecordBatch *record_batch,
-                               gint i)
+garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch,
+                                    gint i)
 {
-  const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+  const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) {
     return NULL;
   }
   auto arrow_column = arrow_record_batch->column(i);
   return garrow_array_new_raw(&arrow_column);
 }
 
-/**
- * garrow_record_batch_get_columns:
- * @record_batch: A #GArrowRecordBatch.
- *
- * Returns: (element-type GArrowArray) (transfer full):
- *   The columns in the record batch.
- */
-GList *
-garrow_record_batch_get_columns(GArrowRecordBatch *record_batch)
-{
-  const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-
-  GList *columns = NULL;
-  for (int i = 0; i < arrow_record_batch->num_columns(); ++i) {
-    auto arrow_column = arrow_record_batch->column(i);
-    GArrowArray *column = garrow_array_new_raw(&arrow_column);
-    columns = g_list_prepend(columns, column);
-  }
-
-  return g_list_reverse(columns);
-}
-
 /**
  * garrow_record_batch_get_column_name:
  * @record_batch: A #GArrowRecordBatch.
@@ -272,8 +236,8 @@ const gchar *
 garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
                                     gint i)
 {
-  const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+  const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) {
     return NULL;
   }
   return arrow_record_batch->column_name(i).c_str();
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index b01abf78904..b40a2aecfbe 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -44,9 +44,9 @@ gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch,
                                    GArrowRecordBatch *other_record_batch);
 
 GArrowSchema *garrow_record_batch_get_schema     (GArrowRecordBatch *record_batch);
-GArrowArray  *garrow_record_batch_get_column     (GArrowRecordBatch *record_batch,
+GARROW_AVAILABLE_IN_1_0
+GArrowArray  *garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch,
                                                   gint i);
-GList        *garrow_record_batch_get_columns    (GArrowRecordBatch *record_batch);
 const gchar  *garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
                                                   gint i);
 guint         garrow_record_batch_get_n_columns  (GArrowRecordBatch *record_batch);
diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp
index 1bbe82f9a3c..5730dee8ce7 100644
--- a/c_glib/arrow-glib/schema.cpp
+++ b/c_glib/arrow-glib/schema.cpp
@@ -198,6 +198,23 @@ garrow_schema_get_field_by_name(GArrowSchema *schema,
   }
 }
 
+/**
+ * garrow_schema_get_field_index:
+ * @schema: A #GArrowSchema.
+ * @name: The name of the field to be found.
+ *
+ * Returns: The index of the found field, -1 on not found.
+ *
+ * Since: 1.0.0
+ */
+gint
+garrow_schema_get_field_index(GArrowSchema *schema,
+                              const gchar *name)
+{
+  const auto &arrow_schema = garrow_schema_get_raw(schema);
+  return arrow_schema->GetFieldIndex(std::string(name));
+}
+
 /**
  * garrow_schema_n_fields:
  * @schema: A #GArrowSchema.
diff --git a/c_glib/arrow-glib/schema.h b/c_glib/arrow-glib/schema.h
index d5e27bbad98..745d266b21f 100644
--- a/c_glib/arrow-glib/schema.h
+++ b/c_glib/arrow-glib/schema.h
@@ -42,6 +42,9 @@ GArrowField     *garrow_schema_get_field        (GArrowSchema *schema,
                                                  guint i);
 GArrowField     *garrow_schema_get_field_by_name(GArrowSchema *schema,
                                                  const gchar *name);
+GARROW_AVAILABLE_IN_1_0
+gint             garrow_schema_get_field_index  (GArrowSchema *schema,
+                                                 const gchar *name);
 
 guint            garrow_schema_n_fields         (GArrowSchema *schema);
 GList           *garrow_schema_get_fields       (GArrowSchema *schema);
diff --git a/c_glib/arrow-glib/table.cpp b/c_glib/arrow-glib/table.cpp
index a29d18bc402..511a2354d0d 100644
--- a/c_glib/arrow-glib/table.cpp
+++ b/c_glib/arrow-glib/table.cpp
@@ -22,8 +22,10 @@
 #endif
 
 #include <arrow-glib/array.hpp>
-#include <arrow-glib/column.hpp>
+#include <arrow-glib/chunked-array.hpp>
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-index.hpp>
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
@@ -37,7 +39,7 @@ G_BEGIN_DECLS
  * @short_description: Table class
  *
  * #GArrowTable is a class for table. Table has zero or more
- * #GArrowColumns and zero or more records.
+ * #GArrowChunkedArrays and zero or more records.
  */
 
 typedef struct GArrowTablePrivate_ {
@@ -129,36 +131,12 @@ garrow_table_class_init(GArrowTableClass *klass)
   g_object_class_install_property(gobject_class, PROP_TABLE, spec);
 }
 
-/**
- * garrow_table_new:
- * @schema: The schema of the table.
- * @columns: (element-type GArrowColumn): The columns of the table.
- *
- * Returns: A newly created #GArrowTable.
- *
- * Deprecated: 0.12.0: Use garrow_table_new_values() instead.
- */
-GArrowTable *
-garrow_table_new(GArrowSchema *schema,
-                 GList *columns)
-{
-  auto arrow_schema = garrow_schema_get_raw(schema);
-  std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
-  for (GList *node = columns; node; node = node->next) {
-    auto column = GARROW_COLUMN(node->data);
-    arrow_columns.push_back(garrow_column_get_raw(column));
-  }
-
-  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
-  return garrow_table_new_raw(&arrow_table);
-}
-
 /**
  * garrow_table_new_values: (skip)
  * @schema: The schema of the table.
- * @values: The values of the table. All values must be instance of the
- *   same class. Available classes are #GArrowColumn, #GArrowArray and
- *   #GArrowRecordBatch.
+ * @values: The values of the table. All values must be instance of
+ *   the same class. Available classes are #GArrowChunkedArray,
+ *   #GArrowArray and #GArrowRecordBatch.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
@@ -172,13 +150,13 @@ garrow_table_new_values(GArrowSchema *schema,
 {
   const auto context = "[table][new][values]";
   auto arrow_schema = garrow_schema_get_raw(schema);
-  std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> arrow_chunked_arrays;
   std::vector<std::shared_ptr<arrow::Array>> arrow_arrays;
   std::vector<std::shared_ptr<arrow::RecordBatch>> arrow_record_batches;
   for (GList *node = values; node; node = node->next) {
-    if (GARROW_IS_COLUMN(node->data)) {
-      auto column = GARROW_COLUMN(node->data);
-      arrow_columns.push_back(garrow_column_get_raw(column));
+    if (GARROW_IS_CHUNKED_ARRAY(node->data)) {
+      auto chunked_array = GARROW_CHUNKED_ARRAY(node->data);
+      arrow_chunked_arrays.push_back(garrow_chunked_array_get_raw(chunked_array));
     } else if (GARROW_IS_ARRAY(node->data)) {
       auto array = GARROW_ARRAY(node->data);
       arrow_arrays.push_back(garrow_array_get_raw(array));
@@ -192,13 +170,13 @@ garrow_table_new_values(GArrowSchema *schema,
                   "%s: %s",
                   context,
                   "value must be one of "
-                  "GArrowColumn, GArrowArray and GArrowRecordBatch");
+                  "GArrowChunkedArray, GArrowArray and GArrowRecordBatch");
       return NULL;
     }
   }
 
   size_t n_types = 0;
-  if (!arrow_columns.empty()) {
+  if (!arrow_chunked_arrays.empty()) {
     ++n_types;
   }
   if (!arrow_arrays.empty()) {
@@ -214,12 +192,12 @@ garrow_table_new_values(GArrowSchema *schema,
                 "%s: %s",
                 context,
                 "all values must be the same objects of "
-                "GArrowColumn, GArrowArray or GArrowRecordBatch");
+                "GArrowChunkedArray, GArrowArray or GArrowRecordBatch");
     return NULL;
   }
 
-  if (!arrow_columns.empty()) {
-    auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
+  if (!arrow_chunked_arrays.empty()) {
+    auto arrow_table = arrow::Table::Make(arrow_schema, arrow_chunked_arrays);
     auto status = arrow_table->Validate();
     if (garrow_error_check(error, status, context)) {
       return garrow_table_new_raw(&arrow_table);
@@ -248,31 +226,33 @@ garrow_table_new_values(GArrowSchema *schema,
 }
 
 /**
- * garrow_table_new_columns:
+ * garrow_table_new_chunked_arrays:
  * @schema: The schema of the table.
- * @columns: (array length=n_columns): The columns of the table.
- * @n_columns: The number of columns.
+ * @chunked_arrays: (array length=n_chunked_arrays): The chunked arrays of
+ *   the table.
+ * @n_chunked_arrays: The number of chunked arrays.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GArrowTable or %NULL on error.
  *
- * Since: 0.12.0
+ * Since: 1.0.0
  */
 GArrowTable *
-garrow_table_new_columns(GArrowSchema *schema,
-                         GArrowColumn **columns,
-                         gsize n_columns,
-                         GError **error)
+garrow_table_new_chunked_arrays(GArrowSchema *schema,
+                                GArrowChunkedArray **chunked_arrays,
+                                gsize n_chunked_arrays,
+                                GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
-  std::vector<std::shared_ptr<arrow::Column>> arrow_columns;
-  for (gsize i = 0; i < n_columns; ++i) {
-    arrow_columns.push_back(garrow_column_get_raw(columns[i]));
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> arrow_chunked_arrays;
+  for (gsize i = 0; i < n_chunked_arrays; ++i) {
+    auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_arrays[i]);
+    arrow_chunked_arrays.push_back(arrow_chunked_array);
   }
 
-  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_columns);
+  auto arrow_table = arrow::Table::Make(arrow_schema, arrow_chunked_arrays);
   auto status = arrow_table->Validate();
-  if (garrow_error_check(error, status, "[table][new][columns]")) {
+  if (garrow_error_check(error, status, "[table][new][chunked-arrays]")) {
     return garrow_table_new_raw(&arrow_table);
   } else {
     return NULL;
@@ -380,19 +360,26 @@ garrow_table_get_schema(GArrowTable *table)
 }
 
 /**
- * garrow_table_get_column:
+ * garrow_table_get_column_data:
  * @table: A #GArrowTable.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: (transfer full): The i-th column in the table.
+ * Returns: (nullable) (transfer full): The i-th column's data in the table.
+ *
+ * Since: 1.0.0
  */
-GArrowColumn *
-garrow_table_get_column(GArrowTable *table,
-                        guint i)
+GArrowChunkedArray *
+garrow_table_get_column_data(GArrowTable *table,
+                             gint i)
 {
-  const auto arrow_table = garrow_table_get_raw(table);
+  const auto &arrow_table = garrow_table_get_raw(table);
+  if (!garrow_internal_index_adjust(i, arrow_table->num_columns())) {
+    return NULL;
+  }
   auto arrow_column = arrow_table->column(i);
-  return garrow_column_new_raw(&arrow_column);
+  return garrow_chunked_array_new_raw(&arrow_column);
 }
 
 /**
@@ -425,24 +412,30 @@ garrow_table_get_n_rows(GArrowTable *table)
  * garrow_table_add_column:
  * @table: A #GArrowTable.
  * @i: The index of the new column.
- * @column: The column to be added.
+ * @field: The field for the column to be added.
+ * @chunked_array: The column data to be added.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable) (transfer full): The newly allocated
  *   #GArrowTable that has a new column or %NULL on error.
  *
- * Since: 0.3.0
+ * Since: 1.0.0
  */
 GArrowTable *
 garrow_table_add_column(GArrowTable *table,
                         guint i,
-                        GArrowColumn *column,
+                        GArrowField *field,
+                        GArrowChunkedArray *chunked_array,
                         GError **error)
 {
   const auto arrow_table = garrow_table_get_raw(table);
-  const auto arrow_column = garrow_column_get_raw(column);
+  const auto arrow_field = garrow_field_get_raw(field);
+  const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
   std::shared_ptr<arrow::Table> arrow_new_table;
-  auto status = arrow_table->AddColumn(i, arrow_column, &arrow_new_table);
+  auto status = arrow_table->AddColumn(i,
+                                       arrow_field,
+                                       arrow_chunked_array,
+                                       &arrow_new_table);
   if (garrow_error_check(error, status, "[table][add-column]")) {
     return garrow_table_new_raw(&arrow_new_table);
   } else {
@@ -480,25 +473,31 @@ garrow_table_remove_column(GArrowTable *table,
  * garrow_table_replace_column:
  * @table: A #GArrowTable.
  * @i: The index of the column to be replaced.
- * @column: The newly added #GArrowColumn.
+ * @field: The field for the new column.
+ * @chunked_array: The newly added column data.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable) (transfer full): The newly allocated
  * #GArrowTable that has @column as the @i-th column or %NULL on
  * error.
  *
- * Since: 0.10.0
+ * Since: 1.0.0
  */
 GArrowTable *
 garrow_table_replace_column(GArrowTable *table,
                             guint i,
-                            GArrowColumn *column,
+                            GArrowField *field,
+                            GArrowChunkedArray *chunked_array,
                             GError **error)
 {
   const auto arrow_table = garrow_table_get_raw(table);
-  const auto arrow_column = garrow_column_get_raw(column);
+  const auto arrow_field = garrow_field_get_raw(field);
+  const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
   std::shared_ptr<arrow::Table> arrow_new_table;
-  auto status = arrow_table->SetColumn(i, arrow_column, &arrow_new_table);
+  auto status = arrow_table->SetColumn(i,
+                                       arrow_field,
+                                       arrow_chunked_array,
+                                       &arrow_new_table);
   if (garrow_error_check(error, status, "[table][replace-column]")) {
     return garrow_table_new_raw(&arrow_new_table);
   } else {
diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h
index f802637c737..f24414b6578 100644
--- a/c_glib/arrow-glib/table.h
+++ b/c_glib/arrow-glib/table.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/column.h>
+#include <arrow-glib/chunked-array.h>
 #include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
 #include <arrow-glib/version.h>
@@ -37,23 +37,17 @@ struct _GArrowTableClass
   GObjectClass parent_class;
 };
 
-#ifndef GARROW_DISABLE_DEPRECATED
-GARROW_DEPRECATED_IN_0_12_FOR(garrow_table_new_values)
-GArrowTable *
-garrow_table_new(GArrowSchema *schema,
-                 GList *columns);
-#endif
 GARROW_AVAILABLE_IN_0_12
 GArrowTable *
 garrow_table_new_values(GArrowSchema *schema,
                         GList *values,
                         GError **error);
-GARROW_AVAILABLE_IN_0_12
+GARROW_AVAILABLE_IN_1_0
 GArrowTable *
-garrow_table_new_columns(GArrowSchema *schema,
-                         GArrowColumn **columns,
-                         gsize n_columns,
-                         GError **error);
+garrow_table_new_chunked_arrays(GArrowSchema *schema,
+                                GArrowChunkedArray **chunked_arrays,
+                                gsize n_chunked_arrays,
+                                GError **error);
 GARROW_AVAILABLE_IN_0_12
 GArrowTable *
 garrow_table_new_arrays(GArrowSchema *schema,
@@ -71,21 +65,28 @@ gboolean        garrow_table_equal         (GArrowTable *table,
                                             GArrowTable *other_table);
 
 GArrowSchema   *garrow_table_get_schema    (GArrowTable *table);
-GArrowColumn   *garrow_table_get_column    (GArrowTable *table,
-                                            guint i);
+GARROW_AVAILABLE_IN_1_0
+GArrowChunkedArray *
+garrow_table_get_column_data(GArrowTable *table,
+                             gint i);
+
 guint           garrow_table_get_n_columns (GArrowTable *table);
 guint64         garrow_table_get_n_rows    (GArrowTable *table);
 
+GARROW_AVAILABLE_IN_1_0
 GArrowTable    *garrow_table_add_column    (GArrowTable *table,
                                             guint i,
-                                            GArrowColumn *column,
+                                            GArrowField *field,
+                                            GArrowChunkedArray *chunked_array,
                                             GError **error);
 GArrowTable    *garrow_table_remove_column (GArrowTable *table,
                                             guint i,
                                             GError **error);
+GARROW_AVAILABLE_IN_1_0
 GArrowTable    *garrow_table_replace_column(GArrowTable *table,
                                             guint i,
-                                            GArrowColumn *column,
+                                            GArrowField *field,
+                                            GArrowChunkedArray *chunked_array,
                                             GError **error);
 gchar          *garrow_table_to_string     (GArrowTable *table,
                                             GError **error);
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 43a89dcff1d..dc925dd9252 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,15 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_1_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 1.0.0
+ */
+#define GARROW_VERSION_1_0 G_ENCODE_VERSION(1, 0)
+
 /**
  * GARROW_VERSION_0_14:
  *
@@ -193,6 +202,20 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_1_0
+#  define GARROW_DEPRECATED_IN_1_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_1_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_1_0
+#  define GARROW_DEPRECATED_IN_1_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_1
+#  define GARROW_AVAILABLE_IN_1_0 GARROW_UNAVAILABLE(1, 0)
+#else
+#  define GARROW_AVAILABLE_IN_1_0
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_14
 #  define GARROW_DEPRECATED_IN_0_14               GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_0_14_FOR(function) GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/configure.ac b/c_glib/configure.ac
index faf7b9bd2dd..ba3909c6b10 100644
--- a/c_glib/configure.ac
+++ b/c_glib/configure.ac
@@ -17,7 +17,7 @@
 
 AC_PREREQ(2.65)
 
-m4_define([arrow_glib_version], 0.14.0)
+m4_define([arrow_glib_version], 1.0.0-SNAPSHOT)
 AC_INIT([arrow-glib],
         arrow_glib_version,
         [https://issues.apache.org/jira/browse/ARROW],
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 7429a22aaba..47e96a4ec19 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -71,7 +71,6 @@
       <title>Table</title>
       <xi:include href="xml/table.xml"/>
       <xi:include href="xml/record-batch.xml"/>
-      <xi:include href="xml/column.xml"/>
       <xi:include href="xml/chunked-array.xml"/>
     </chapter>
     <chapter id="table-builder">
@@ -160,6 +159,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-1-0-0" role="1.0.0">
+    <title>Index of new symbols in 1.0.0</title>
+    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-0-14-0" role="0.14.0">
     <title>Index of new symbols in 0.14.0</title>
     <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml b/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
index 81bbb08c3e9..ac9f686ac27 100644
--- a/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
+++ b/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
@@ -84,6 +84,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-1-0-0" role="1.0.0">
+    <title>Index of new symbols in 1.0.0</title>
+    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-0-14-0" role="0.14.0">
     <title>Index of new symbols in 0.14.0</title>
     <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/doc/parquet-glib/parquet-glib-docs.xml b/c_glib/doc/parquet-glib/parquet-glib-docs.xml
index 4485a6765cb..d58e92eacd1 100644
--- a/c_glib/doc/parquet-glib/parquet-glib-docs.xml
+++ b/c_glib/doc/parquet-glib/parquet-glib-docs.xml
@@ -57,6 +57,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-1-0-0" role="1.0.0">
+    <title>Index of new symbols in 1.0.0</title>
+    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-0-12-0" role="0.12.0">
     <title>Index of new symbols in 0.12.0</title>
     <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/example/lua/read-batch.lua b/c_glib/example/lua/read-batch.lua
index 8dc2fd82b39..a4c86763f08 100644
--- a/c_glib/example/lua/read-batch.lua
+++ b/c_glib/example/lua/read-batch.lua
@@ -28,14 +28,14 @@ for i = 0, reader:get_n_record_batches() - 1 do
    print(string.rep("=", 40))
    print("record-batch["..i.."]:")
    for j = 0, record_batch:get_n_columns() - 1 do
-      local column = record_batch:get_column(j)
       local column_name = record_batch:get_column_name(j)
+      local column_data = record_batch:get_column_data(j)
       io.write("  "..column_name..": [")
       for k = 0, record_batch:get_n_rows() - 1 do
 	 if k > 0 then
 	    io.write(", ")
 	 end
-	 io.write(column:get_value(k))
+	 io.write(column_data:get_value(k))
       end
       print("]")
    end
diff --git a/c_glib/example/lua/read-stream.lua b/c_glib/example/lua/read-stream.lua
index e67acf506ff..7bf1083e225 100644
--- a/c_glib/example/lua/read-stream.lua
+++ b/c_glib/example/lua/read-stream.lua
@@ -33,14 +33,14 @@ while true do
    print(string.rep("=", 40))
    print("record-batch["..i.."]:")
    for j = 0, record_batch:get_n_columns() - 1 do
-      local column = record_batch:get_column(j)
       local column_name = record_batch:get_column_name(j)
+      local column_data = record_batch:get_column_data(j)
       io.write("  "..column_name..": [")
       for k = 0, record_batch:get_n_rows() - 1 do
 	 if k > 0 then
 	    io.write(", ")
 	 end
-	 io.write(column:get_value(k))
+	 io.write(column_data:get_value(k))
       end
       print("]")
    end
diff --git a/c_glib/example/read-batch.c b/c_glib/example/read-batch.c
index 4382816e048..273dc70ffa5 100644
--- a/c_glib/example/read-batch.c
+++ b/c_glib/example/read-batch.c
@@ -78,8 +78,9 @@ print_record_batch(GArrowRecordBatch *record_batch)
     g_print("columns[%u](%s): ",
             nth_column,
             garrow_record_batch_get_column_name(record_batch, nth_column));
-    array = garrow_record_batch_get_column(record_batch, nth_column);
+    array = garrow_record_batch_get_column_data(record_batch, nth_column);
     print_array(array);
+    g_object_unref(array);
   }
 }
 
diff --git a/c_glib/example/read-stream.c b/c_glib/example/read-stream.c
index 92411bcc780..133418faa90 100644
--- a/c_glib/example/read-stream.c
+++ b/c_glib/example/read-stream.c
@@ -78,8 +78,9 @@ print_record_batch(GArrowRecordBatch *record_batch)
     g_print("columns[%u](%s): ",
             nth_column,
             garrow_record_batch_get_column_name(record_batch, nth_column));
-    array = garrow_record_batch_get_column(record_batch, nth_column);
+    array = garrow_record_batch_get_column_data(record_batch, nth_column);
     print_array(array);
+    g_object_unref(array);
   }
 }
 
diff --git a/c_glib/gandiva-glib/native-function.cpp b/c_glib/gandiva-glib/native-function.cpp
index d0691471bca..5ff265f1232 100644
--- a/c_glib/gandiva-glib/native-function.cpp
+++ b/c_glib/gandiva-glib/native-function.cpp
@@ -93,21 +93,25 @@ ggandiva_native_function_class_init(GGandivaNativeFunctionClass *klass)
 }
 
 /**
- * ggandiva_native_function_get_signature:
+ * ggandiva_native_function_get_signatures:
  * @native_function: A #GGandivaNativeFunction.
  *
- * Returns: (transfer full): A #GGandivaFunctionSignature that represents
- *   the signature of the native function.
+ * Returns: (element-type GGandivaFunctionSignature) (transfer full):
+ *   A list of #GGandivaFunctionSignature supported by the native function.
  *
- * Since: 0.14.0
+ * Since: 1.0.0
  */
-GGandivaFunctionSignature *
-ggandiva_native_function_get_signature(GGandivaNativeFunction *native_function)
+GList *
+ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function)
 {
   auto gandiva_native_function =
     ggandiva_native_function_get_raw(native_function);
-  auto &gandiva_function_signature = gandiva_native_function->signature();
-  return ggandiva_function_signature_new_raw(&gandiva_function_signature);
+  GList *signatures = nullptr;
+  for (auto &gandiva_signature : gandiva_native_function->signatures()) {
+    auto signature = ggandiva_function_signature_new_raw(&gandiva_signature);
+    signatures = g_list_prepend(signatures, signature);
+  }
+  return g_list_reverse(signatures);
 }
 
 /**
@@ -135,7 +139,7 @@ ggandiva_native_function_equal(GGandivaNativeFunction *native_function,
  * @native_function: A #GGandivaNativeFunction.
  *
  * Returns: (transfer full):
- *   The string representation of the signature of the native function.
+ *   The string representation of the signatures of the native function.
  *   It should be freed with g_free() when no longer needed.
  *
  * Since: 0.14.0
@@ -145,8 +149,17 @@ ggandiva_native_function_to_string(GGandivaNativeFunction *native_function)
 {
   auto gandiva_native_function =
     ggandiva_native_function_get_raw(native_function);
-  auto gandiva_function_signature = gandiva_native_function->signature();
-  return g_strdup(gandiva_function_signature.ToString().c_str());
+  auto string = g_string_new(NULL);
+  for (auto &gandiva_signature : gandiva_native_function->signatures()) {
+    if (string->len > 0) {
+      g_string_append(string, ", ");
+    }
+    const auto &signature_string = gandiva_signature.ToString();
+    g_string_append_len(string,
+                        signature_string.data(),
+                        signature_string.length());
+  }
+  return g_string_free(string, FALSE);
 }
 
 /**
diff --git a/c_glib/gandiva-glib/native-function.h b/c_glib/gandiva-glib/native-function.h
index a7ffb60a4ce..8b4d6a44c80 100644
--- a/c_glib/gandiva-glib/native-function.h
+++ b/c_glib/gandiva-glib/native-function.h
@@ -51,7 +51,8 @@ struct _GGandivaNativeFunctionClass
   GObjectClass parent_class;
 };
 
-GGandivaFunctionSignature *ggandiva_native_function_get_signature(GGandivaNativeFunction *native_function);
+GList *
+ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function);
 gboolean
 ggandiva_native_function_equal(GGandivaNativeFunction *native_function,
                                GGandivaNativeFunction *other_native_function);
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 3696aba26cb..abb39daffb8 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++11',
         ])
 
-version = '0.14.0'
+version = '1.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp b/c_glib/parquet-glib/arrow-file-reader.cpp
index 5c16e827fc1..217bd190d51 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -22,6 +22,7 @@
 #endif
 
 #include <arrow-glib/arrow-glib.hpp>
+#include <arrow-glib/internal-index.hpp>
 
 #include <parquet-glib/arrow-file-reader.hpp>
 
@@ -252,7 +253,7 @@ gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
  * gparquet_arrow_file_reader_select_schema:
  * @reader: A #GParquetArrowFileReader.
  * @column_indexes: (array length=n_column_indexes):
- *   The array of column indexes to be selected
+ *   The array of column indexes to be selected.
  * @n_column_indexes: The length of `column_indexes`.
  * @error: (nullable): Return locatipcn for a #GError or %NULL.
  *
@@ -285,42 +286,44 @@ gparquet_arrow_file_reader_select_schema(GParquetArrowFileReader *reader,
 }
 
 /**
- * gparquet_arrow_file_reader_read_column:
+ * gparquet_arrow_file_reader_read_column_data:
  * @reader: A #GParquetArrowFileReader.
- * @column_index: Index integer of the column to be read.
+ * @i: The index of the column to be read. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  * @error: (nullable): Return locatipcn for a #GError or %NULL.
  *
- * Returns: (transfer full) (nullable): A read #GArrowColumn.
+ * Returns: (transfer full) (nullable): A read #GArrowChunkedArray.
  *
- * Since: 0.12.0
+ * Since: 1.0.0
  */
-GArrowColumn *
-gparquet_arrow_file_reader_read_column(GParquetArrowFileReader *reader,
-                                       gint column_index,
-                                       GError **error)
+GArrowChunkedArray *
+gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader,
+                                            gint i,
+                                            GError **error)
 {
+  const auto tag = "[parquet][arrow][file-reader][read-column-data]";
   auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
 
-  std::vector<int> indices = {column_index};
-  std::shared_ptr<arrow::Schema> arrow_schema;
-  auto status = parquet_arrow_file_reader->GetSchema(indices, &arrow_schema);
-  if (!garrow_error_check(error,
-                          status,
-                          "[parquet][arrow][file-reader][read-column][get-schema]")) {
+  const auto n_columns =
+    parquet_arrow_file_reader->parquet_reader()->metadata()->num_columns();
+  if (!garrow_internal_index_adjust(i, n_columns)) {
+    garrow_error_check(error,
+                       arrow::Status::IndexError("Out of index: "
+                                                 "<0..", n_columns, ">: "
+                                                 "<", i, ">"),
+                       tag);
     return NULL;
   }
 
   std::shared_ptr<arrow::ChunkedArray> arrow_chunked_array;
-  status = parquet_arrow_file_reader->ReadColumn(column_index, &arrow_chunked_array);
-  if (!garrow_error_check(error,
-                          status,
-                          "[parquet][arrow][file-reader][read-column]")) {
+  auto status =
+    parquet_arrow_file_reader->ReadColumn(i, &arrow_chunked_array);
+  if (!garrow_error_check(error, status, tag)) {
     return NULL;
   }
 
-  auto arrow_field = arrow_schema->field(0);
-  auto arrow_column = std::make_shared<arrow::Column>(arrow_field, arrow_chunked_array);
-  return garrow_column_new_raw(&arrow_column);
+  return garrow_chunked_array_new_raw(&arrow_chunked_array);
 }
 
 /**
diff --git a/c_glib/parquet-glib/arrow-file-reader.h b/c_glib/parquet-glib/arrow-file-reader.h
index c251dcd0371..a0d1a8eca88 100644
--- a/c_glib/parquet-glib/arrow-file-reader.h
+++ b/c_glib/parquet-glib/arrow-file-reader.h
@@ -54,10 +54,11 @@ gparquet_arrow_file_reader_select_schema(GParquetArrowFileReader *reader,
                                          gsize n_column_indexes,
                                          GError **error);
 
-GArrowColumn *
-gparquet_arrow_file_reader_read_column(GParquetArrowFileReader *reader,
-                                       gint column_index,
-                                       GError **error);
+GARROW_AVAILABLE_IN_1_0
+GArrowChunkedArray *
+gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader,
+                                            gint i,
+                                            GError **error);
 
 gint
 gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader);
diff --git a/c_glib/test/gandiva/test-function-registry.rb b/c_glib/test/gandiva/test-function-registry.rb
index 85b03071813..25bac667310 100644
--- a/c_glib/test/gandiva/test-function-registry.rb
+++ b/c_glib/test/gandiva/test-function-registry.rb
@@ -27,7 +27,7 @@ def setup
     def test_found
       native_function = @registry.native_functions[0]
       assert_equal(native_function,
-                   @registry.lookup(native_function.signature))
+                   @registry.lookup(native_function.signatures[0]))
     end
 
     def test_not_found
diff --git a/c_glib/test/gandiva/test-native-function.rb b/c_glib/test/gandiva/test-native-function.rb
index f2546daf4a0..7888f96b678 100644
--- a/c_glib/test/gandiva/test-native-function.rb
+++ b/c_glib/test/gandiva/test-native-function.rb
@@ -32,15 +32,15 @@ def lookup(name, param_types, return_type)
     @registry.lookup(signature)
   end
 
-  def test_get_signature
-    assert_kind_of(Gandiva::FunctionSignature,
-                   @not.signature)
+  def test_signatures
+    assert_equal([Gandiva::FunctionSignature],
+                 @not.signatures.collect(&:class).uniq)
   end
 
   sub_test_case("equal") do
     def test_true
       assert do
-        @not == @registry.lookup(@not.signature)
+        @not == @registry.lookup(@not.signatures[0])
       end
     end
 
@@ -52,8 +52,11 @@ def test_false
   end
 
   def test_to_string
-    assert_equal(@not.signature.to_s,
-                 @not.to_s)
+    modulo = lookup("modulo",
+                    [int64_data_type, int64_data_type],
+                    int64_data_type)
+    assert_equal(modulo.signatures.collect(&:to_s).join(", "),
+                 modulo.to_s)
   end
 
   sub_test_case("get_result_nullbale_type") do
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index 788cffe6b90..f5412a932c1 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -157,15 +157,15 @@ def append_to_builder(builder, value)
       end
     end
 
-    def build_table(arrays)
-      fields = arrays.collect do |name, array|
-        Arrow::Field.new(name, array.value_data_type)
+    def build_table(columns)
+      fields = []
+      arrays = []
+      columns.each do |name, array|
+        fields << Arrow::Field.new(name, array.value_data_type)
+        arrays << array
       end
       schema = Arrow::Schema.new(fields)
-      columns = arrays.collect.with_index do |(_name, array), i|
-        Arrow::Column.new(fields[i], array)
-      end
-      Arrow::Table.new(schema, columns)
+      Arrow::Table.new(schema, arrays)
     end
 
     def build_record_batch(arrays)
diff --git a/c_glib/test/helper/data-type.rb b/c_glib/test/helper/data-type.rb
index 5716f7eef5a..b8224409873 100644
--- a/c_glib/test/helper/data-type.rb
+++ b/c_glib/test/helper/data-type.rb
@@ -48,6 +48,10 @@ def int32_data_type
       Arrow::Int32DataType.new
     end
 
+    def int64_data_type
+      Arrow::Int64DataType.new
+    end
+
     def string_data_type
       Arrow::StringDataType.new
     end
diff --git a/c_glib/test/parquet/test-arrow-file-reader.rb b/c_glib/test/parquet/test-arrow-file-reader.rb
index 96574542a4d..7ff17c2ba11 100644
--- a/c_glib/test/parquet/test-arrow-file-reader.rb
+++ b/c_glib/test/parquet/test-arrow-file-reader.rb
@@ -53,24 +53,13 @@ def test_select_schema
   end
 
   def test_read_column
-    a = @reader.read_column(0)
     assert_equal([
-                   "a: string",
-                   Arrow::ChunkedArray.new([@a_array]).to_s,
+                   Arrow::ChunkedArray.new([@a_array]),
+                   Arrow::ChunkedArray.new([@b_array]),
                  ],
                  [
-                   a.field.to_s,
-                   a.data.to_s,
-                 ])
-
-    b = @reader.read_column(1)
-    assert_equal([
-                   "b: int32",
-                   Arrow::ChunkedArray.new([@b_array]).to_s,
-                 ],
-                 [
-                   b.field.to_s,
-                   b.data.to_s,
+                   @reader.read_column_data(0),
+                   @reader.read_column_data(-1),
                  ])
   end
 end
diff --git a/c_glib/test/plasma/test-plasma-created-object.rb b/c_glib/test/plasma/test-plasma-created-object.rb
index 9025ff4ac22..857322d20e1 100644
--- a/c_glib/test/plasma/test-plasma-created-object.rb
+++ b/c_glib/test/plasma/test-plasma-created-object.rb
@@ -45,7 +45,7 @@ def teardown
 
   test("#abort") do
     @object.data.set_data(0, @data)
-    assert_raise(Arrow::Error::PlasmaObjectExists) do
+    assert_raise(Arrow::Error::AlreadyExists) do
       @client.create(@id, @data.bytesize, @options)
     end
     @object.abort
diff --git a/c_glib/test/test-chunked-array.rb b/c_glib/test/test-chunked-array.rb
index 05ea66b561d..82b46968a0d 100644
--- a/c_glib/test/test-chunked-array.rb
+++ b/c_glib/test/test-chunked-array.rb
@@ -49,13 +49,13 @@ def test_value_type
                  Arrow::ChunkedArray.new(chunks).value_type)
   end
 
-  def test_length
+  def test_n_rows
     chunks = [
       build_boolean_array([true, false]),
       build_boolean_array([true]),
     ]
     chunked_array = Arrow::ChunkedArray.new(chunks)
-    assert_equal(3, chunked_array.length)
+    assert_equal(3, chunked_array.n_rows)
   end
 
   def test_n_nulls
diff --git a/c_glib/test/test-column.rb b/c_glib/test/test-column.rb
deleted file mode 100644
index 01127de6e02..00000000000
--- a/c_glib/test/test-column.rb
+++ /dev/null
@@ -1,115 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-class TestColumn < Test::Unit::TestCase
-  include Helper::Buildable
-
-  sub_test_case(".new") do
-    def test_array
-      field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-      array = build_boolean_array([true])
-      column = Arrow::Column.new(field, array)
-      assert_equal(1, column.length)
-    end
-
-    def test_chunked_array
-      field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-      chunks = [
-        build_boolean_array([true]),
-        build_boolean_array([false, true]),
-      ]
-      chunked_array = Arrow::ChunkedArray.new(chunks)
-      column = Arrow::Column.new(field, chunked_array)
-      assert_equal(3, column.length)
-    end
-  end
-
-  def test_equal
-    field1 = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array1 = build_boolean_array([true, false])
-    field2 = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    chunks = [
-      build_boolean_array([true]),
-      build_boolean_array([false]),
-    ]
-    array2 = Arrow::ChunkedArray.new(chunks)
-    assert_equal(Arrow::Column.new(field1, array1),
-                 Arrow::Column.new(field2, array2))
-  end
-
-  def test_length
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array = build_boolean_array([true, false])
-    column = Arrow::Column.new(field, array)
-    assert_equal(2, column.length)
-  end
-
-  def test_n_nulls
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array = build_boolean_array([true, nil, nil])
-    column = Arrow::Column.new(field, array)
-    assert_equal(2, column.n_nulls)
-  end
-
-  def test_field
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array = build_boolean_array([true])
-    column = Arrow::Column.new(field, array)
-    assert_equal("enabled", column.field.name)
-  end
-
-  def test_name
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array = build_boolean_array([true])
-    column = Arrow::Column.new(field, array)
-    assert_equal("enabled", column.name)
-  end
-
-  def test_data_type
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    array = build_boolean_array([true])
-    column = Arrow::Column.new(field, array)
-    assert_equal("bool", column.data_type.to_s)
-  end
-
-  def test_data
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    chunks = [
-      build_boolean_array([true]),
-      build_boolean_array([false, true]),
-    ]
-    chunked_array = Arrow::ChunkedArray.new(chunks)
-    column = Arrow::Column.new(field, chunked_array)
-    assert_equal(3, column.data.length)
-  end
-
-  def test_slice
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    chunks1 = [
-      build_boolean_array([true, false, true]),
-      build_boolean_array([false, true]),
-    ]
-    chunks2 = [
-      build_boolean_array([false, true]),
-      build_boolean_array([false]),
-    ]
-    chunked_array = Arrow::ChunkedArray.new(chunks1)
-    column = Arrow::Column.new(field, chunked_array)
-    sub_column = column.slice(1, 3)
-    assert_equal(chunks2, sub_column.data.chunks)
-  end
-end
diff --git a/c_glib/test/test-feather-file-reader.rb b/c_glib/test/test-feather-file-reader.rb
index 901b94d2151..48a4fc75488 100644
--- a/c_glib/test/test-feather-file-reader.rb
+++ b/c_glib/test/test-feather-file-reader.rb
@@ -113,45 +113,31 @@ def setup_file(data)
       "is_critical" => build_boolean_array([]),
     }
     setup_file(:columns => columns) do |reader|
+      actual_column_names = reader.n_columns.times.collect do |i|
+        reader.get_column_name(i)
+      end
       assert_equal([
                      "message",
                      "is_critical",
                    ],
-                   [
-                     reader.get_column_name(0),
-                     reader.get_column_name(1),
-                   ])
-    end
-  end
-
-  test("#get_column") do
-    columns = {
-      "message" => build_string_array([]),
-      "is_critical" => build_boolean_array([]),
-    }
-    setup_file(:columns => columns) do |reader|
-      assert_equal([
-                     "message",
-                     "is_critical",
-                   ],
-                   [
-                     reader.get_column(0).name,
-                     reader.get_column(1).name,
-                   ])
+                   actual_column_names)
     end
   end
 
-  test("#columns") do
+  test("#get_column_data") do
     columns = {
-      "message" => build_string_array([]),
-      "is_critical" => build_boolean_array([]),
+      "message" => build_string_array(["Hello"]),
+      "is_critical" => build_boolean_array([false]),
     }
     setup_file(:columns => columns) do |reader|
+      actual_columns = reader.n_columns.times.collect do |i|
+        reader.get_column_data(i).get_chunk(0)
+      end
       assert_equal([
-                     "message",
-                     "is_critical",
+                     columns["message"],
+                     columns["is_critical"],
                    ],
-                   reader.columns.collect(&:name))
+                   actual_columns)
     end
   end
 
diff --git a/c_glib/test/test-feather-file-writer.rb b/c_glib/test/test-feather-file-writer.rb
index 91dd1120939..247d937e93e 100644
--- a/c_glib/test/test-feather-file-writer.rb
+++ b/c_glib/test/test-feather-file-writer.rb
@@ -40,27 +40,31 @@ def test_append
     input = Arrow::MemoryMappedInputStream.new(tempfile.path)
     begin
       reader = Arrow::FeatherFileReader.new(input)
-      assert_equal([true, "Log"],
-                   [reader.has_description?, reader.description])
-      column_values = {}
-      reader.columns.each do |column|
-        values = []
-        column.data.chunks.each do |array|
-          array.length.times do |j|
-            if array.respond_to?(:get_string)
-              values << array.get_string(j)
-            else
-              values << array.get_value(j)
-            end
-          end
-        end
-        column_values[column.name] = values
+      columns = reader.n_columns.times.collect do |i|
+        [
+          reader.get_column_name(i),
+          reader.get_column_data(i).get_chunk(0),
+        ]
       end
-      assert_equal({
-                     "message" => ["Crash", "Error", "Shutdown"],
-                     "is_critical" => [true, true, false],
-                   },
-                   column_values)
+      assert_equal([
+                     true,
+                     "Log",
+                     [
+                       [
+                         "message",
+                         build_string_array(["Crash", "Error", "Shutdown"]),
+                       ],
+                       [
+                         "is_critical",
+                         build_boolean_array([true, true, false]),
+                       ],
+                     ],
+                   ],
+                   [
+                     reader.has_description?,
+                     reader.description,
+                     columns,
+                   ])
     ensure
       input.close
     end
diff --git a/c_glib/test/test-file-writer.rb b/c_glib/test/test-file-writer.rb
index 67aed85f73b..5f9c3c4e19a 100644
--- a/c_glib/test/test-file-writer.rb
+++ b/c_glib/test/test-file-writer.rb
@@ -60,12 +60,11 @@ def test_write_table
     array = build_boolean_array([true, false, true])
     field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
     schema = Arrow::Schema.new([field])
-    column = Arrow::Column.new(field, array)
 
     begin
       file_writer = Arrow::RecordBatchFileWriter.new(output, schema)
       begin
-        table = Arrow::Table.new(schema, [column])
+        table = Arrow::Table.new(schema, [array])
         file_writer.write_table(table)
       ensure
         file_writer.close
diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb
index 23078d784ff..c9ac75000bc 100644
--- a/c_glib/test/test-record-batch.rb
+++ b/c_glib/test/test-record-batch.rb
@@ -87,31 +87,26 @@ def test_schema
                    @record_batch.schema.fields.collect(&:name))
     end
 
-    sub_test_case("#column") do
+    sub_test_case("#column_data") do
       def test_positive
         assert_equal(build_boolean_array(@valid_values),
-                     @record_batch.get_column(1))
+                     @record_batch.get_column_data(1))
       end
 
       def test_negative
         assert_equal(build_boolean_array(@visible_values),
-                     @record_batch.get_column(-2))
+                     @record_batch.get_column_data(-2))
       end
 
       def test_positive_out_of_index
-        assert_nil(@record_batch.get_column(2))
+        assert_nil(@record_batch.get_column_data(2))
       end
 
       def test_negative_out_of_index
-        assert_nil(@record_batch.get_column(-3))
+        assert_nil(@record_batch.get_column_data(-3))
       end
     end
 
-    def test_columns
-      assert_equal([5, 5],
-                   @record_batch.columns.collect(&:length))
-    end
-
     def test_n_columns
       assert_equal(2, @record_batch.n_columns)
     end
@@ -123,7 +118,7 @@ def test_n_rows
     def test_slice
       sub_record_batch = @record_batch.slice(3, 2)
       sub_visible_values = sub_record_batch.n_rows.times.collect do |i|
-        sub_record_batch.get_column(0).get_value(i)
+        sub_record_batch.get_column_data(0).get_value(i)
       end
       assert_equal([false, true],
                    sub_visible_values)
diff --git a/c_glib/test/test-schema.rb b/c_glib/test/test-schema.rb
index 4710cfb149d..6ff5514afdc 100644
--- a/c_glib/test/test-schema.rb
+++ b/c_glib/test/test-schema.rb
@@ -47,6 +47,20 @@ def test_not_found
     end
   end
 
+  sub_test_case("#get_field_index") do
+    def test_found
+      field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+      schema = Arrow::Schema.new([field])
+      assert_equal(0, schema.get_field_index("enabled"))
+    end
+
+    def test_not_found
+      field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+      schema = Arrow::Schema.new([field])
+      assert_equal(-1, schema.get_field_index("nonexistent"))
+    end
+  end
+
   def test_n_fields
     fields = [
       Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
diff --git a/c_glib/test/test-table.rb b/c_glib/test/test-table.rb
index 54ba7392dae..9e46a4ba78a 100644
--- a/c_glib/test/test-table.rb
+++ b/c_glib/test/test-table.rb
@@ -30,26 +30,28 @@ def setup
 
     def dump_table(table)
       table.n_columns.times.collect do |i|
-        column = table.get_column(i)
+        field = table.schema.get_field(i)
+        chunked_array = table.get_column_data(i)
         values = []
-        column.data.chunks.each do |chunk|
+        chunked_array.chunks.each do |chunk|
           chunk.length.times do |j|
             values << chunk.get_value(j)
           end
         end
         [
-          column.name,
+          field.name,
           values,
         ]
       end
     end
 
-    def test_columns
-      columns = [
-        Arrow::Column.new(@fields[0], build_boolean_array([true])),
-        Arrow::Column.new(@fields[1], build_boolean_array([false])),
+    def test_arrays
+      require_gi_bindings(3, 3, 1)
+      arrays = [
+        build_boolean_array([true]),
+        build_boolean_array([false]),
       ]
-      table = Arrow::Table.new(@schema, columns)
+      table = Arrow::Table.new(@schema, arrays)
       assert_equal([
                      ["visible", [true]],
                      ["valid", [false]],
@@ -57,16 +59,18 @@ def test_columns
                    dump_table(table))
     end
 
-    def test_arrays
+    def test_chunked_arrays
       require_gi_bindings(3, 3, 1)
       arrays = [
-        build_boolean_array([true]),
-        build_boolean_array([false]),
+        Arrow::ChunkedArray.new([build_boolean_array([true]),
+                                 build_boolean_array([false])]),
+        Arrow::ChunkedArray.new([build_boolean_array([false]),
+                                 build_boolean_array([true])]),
       ]
       table = Arrow::Table.new(@schema, arrays)
       assert_equal([
-                     ["visible", [true]],
-                     ["valid", [false]],
+                     ["visible", [true, false]],
+                     ["valid", [false, true]],
                    ],
                    dump_table(table))
     end
@@ -101,8 +105,8 @@ def setup
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        Arrow::Column.new(fields[0], build_boolean_array([true])),
-        Arrow::Column.new(fields[1], build_boolean_array([false])),
+        build_boolean_array([true]),
+        build_boolean_array([false]),
       ]
       @table = Arrow::Table.new(schema, columns)
     end
@@ -114,8 +118,8 @@ def test_equal
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        Arrow::Column.new(fields[0], build_boolean_array([true])),
-        Arrow::Column.new(fields[1], build_boolean_array([false])),
+        build_boolean_array([true]),
+        build_boolean_array([false]),
       ]
       other_table = Arrow::Table.new(schema, columns)
       assert_equal(@table, other_table)
@@ -126,8 +130,15 @@ def test_schema
                    @table.schema.fields.collect(&:name))
     end
 
-    def test_column
-      assert_equal("valid", @table.get_column(1).name)
+    def test_column_data
+      assert_equal([
+                     Arrow::ChunkedArray.new([build_boolean_array([true])]),
+                     Arrow::ChunkedArray.new([build_boolean_array([false])]),
+                   ],
+                   [
+                     @table.get_column_data(0),
+                     @table.get_column_data(-1),
+                   ])
     end
 
     def test_n_columns
@@ -140,8 +151,8 @@ def test_n_rows
 
     def test_add_column
       field = Arrow::Field.new("added", Arrow::BooleanDataType.new)
-      column = Arrow::Column.new(field, build_boolean_array([true]))
-      new_table = @table.add_column(1, column)
+      chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])])
+      new_table = @table.add_column(1, field, chunked_array)
       assert_equal(["visible", "added", "valid"],
                    new_table.schema.fields.collect(&:name))
     end
@@ -154,8 +165,8 @@ def test_remove_column
 
     def test_replace_column
       field = Arrow::Field.new("added", Arrow::BooleanDataType.new)
-      column = Arrow::Column.new(field, build_boolean_array([true]))
-      new_table = @table.replace_column(0, column)
+      chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true])])
+      new_table = @table.replace_column(0, field, chunked_array)
       assert_equal(["added", "valid"],
                    new_table.schema.fields.collect(&:name))
     end
diff --git a/ci/PKGBUILD b/ci/PKGBUILD
index 1d20bb7284d..a0b94784d4c 100644
--- a/ci/PKGBUILD
+++ b/ci/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=0.13.0.9000
+pkgver=0.14.1.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
@@ -32,32 +32,33 @@ depends=("${MINGW_PACKAGE_PREFIX}-boost"
 makedepends=("${MINGW_PACKAGE_PREFIX}-cmake"
              "${MINGW_PACKAGE_PREFIX}-gcc")
 options=("staticlibs" "strip" "!buildflags")
-source_dir=apache-${_realname}-${pkgver}
+
+# For installing from a local checkout, set source_dir to . and don't include
+# a "source" param below
+source_dir="$APPVEYOR_BUILD_FOLDER"
+# else
+# source_dir=apache-${_realname}-${pkgver}
 
 # For released version:
 #source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz")
 #sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a")
 # For github dev version:
 # Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision
-source=("${source_dir}"::"git+https://github.com/apache/arrow")
-sha256sums=("SKIP")
+# source=("${source_dir}"::"git+https://github.com/apache/arrow")
+# sha256sums=("SKIP")
+# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}"
 
 cmake_build_type=release
 cpp_build_dir=build-${CARCH}-cpp
 
 pkgver() {
-  cd "$source_dir"
-  grep Version r/DESCRIPTION | cut -d " " -f 2
-}
-
-prepare() {
-  pushd ${source_dir}
-  #patch -p1 -N -i ${srcdir}/3923.patch
-  popd
+  # The only purpose of this here is to cause the job to error if the
+  # version in pkgver is different from what is in r/DESCRIPTION
+  grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2
 }
 
 build() {
-  ARROW_CPP_DIR="$(pwd)/${source_dir}/cpp"
+  ARROW_CPP_DIR="${source_dir}/cpp"
   [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir}
   mkdir -p ${cpp_build_dir}
   pushd ${cpp_build_dir}
@@ -102,12 +103,6 @@ build() {
   popd
 }
 
-check() {
-  # TODO
-  # make -C ${cpp_build_dir} test
-  :
-}
-
 package() {
   make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install
 
diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml
index e34d2bf299d..fd21ed8d3fa 100644
--- a/ci/conda_env_cpp.yml
+++ b/ci/conda_env_cpp.yml
@@ -36,5 +36,6 @@ python
 rapidjson
 snappy
 thrift-cpp>=0.11.0
+uriparser
 zlib
 zstd
diff --git a/ci/conda_env_r.yml b/ci/conda_env_r.yml
new file mode 100644
index 00000000000..0eff8a39909
--- /dev/null
+++ b/ci/conda_env_r.yml
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+r-assertthat
+r-base
+r-bit64
+r-fs
+r-purrr
+r-r6
+r-rcpp >=0.12.18.2
+r-rlang
+r-tidyselect
+# Test dependencies
+pandoc
+r-covr
+r-hms
+r-lubridate
+r-pkgdown
+r-rmarkdown
+r-roxygen2
+r-testthat
+r-tibble
diff --git a/ci/docker_build_and_fuzzit.sh b/ci/docker_build_and_fuzzit.sh
new file mode 100755
index 00000000000..3ab3145c3c3
--- /dev/null
+++ b/ci/docker_build_and_fuzzit.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+export ARROW_FUZZING="ON"
+#export ARROW_DEPENDENCY_SOURCE="BUNDLED"
+export ARROW_USE_ASAN="ON"
+export CC="clang-7"
+export CXX="clang++-7"
+export ARROW_BUILD_TYPE="RelWithDebInfo"
+export ARROW_ORC="OFF"
+export ARROW_PARQUET="OFF"
+export ARROW_PLASMA="OFF"
+export ARROW_FLIGHT="OFF"
+export ARROW_BUILD_BENCHMARKS="OFF"
+export ARROW_WITH_BZ2="OFF"
+export ARROW_WITH_ZSTD="OFF"
+export ARROW_BUILD_UTILITIES="OFF"
+/arrow/ci/docker_build_cpp.sh || exit 1
+pushd /build/cpp
+
+mkdir ./relwithdebinfo/out
+cp ./relwithdebinfo/arrow-ipc-fuzzing-test ./relwithdebinfo/out/fuzzer
+ldd ./relwithdebinfo/arrow-ipc-fuzzing-test | grep "=> /" | awk '{print $3}' | xargs -I '{}' cp -v '{}' ./relwithdebinfo/out/.
+cd ./relwithdebinfo/out/
+tar -czvf fuzzer.tar.gz *
+cd ../../
+
+export TARGET_ID=u79f6bXYgNH4NkU99iWK
+export FUZZIT_API_KEY=${FUZZIT_API_KEY:-ac6089a1bc2313679f2d99bb80553162c380676bff3f094de826b16229e28184a8084b86f52c95112bde6b3dbb07b9b7}
+wget -O fuzzit https://bin.fuzzit.dev/fuzzit-1.1
+chmod a+x fuzzit
+./fuzzit auth $FUZZIT_API_KEY
+./fuzzit create job --type fuzzing --host bionic-llvm7 --revision $CIRCLE_SHA1 --branch $CIRCLE_BRANCH $TARGET_ID ./relwithdebinfo/out/fuzzer.tar.gz
diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh
index 471a73202e3..307b4cdc14a 100755
--- a/ci/docker_build_cpp.sh
+++ b/ci/docker_build_cpp.sh
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 source_dir=${1:-/arrow/cpp}
 build_dir=${2:-/build/cpp}
@@ -29,14 +29,22 @@ mkdir -p ${build_dir}
 pushd ${build_dir}
 
 cmake -GNinja \
-      -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-      -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \
-      -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
       -DCMAKE_INSTALL_PREFIX=${install_dir} \
       -DCMAKE_INSTALL_LIBDIR=lib \
-      -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-ON} \
-      -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-ON} \
+      -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
+      -DCMAKE_CXX_FLAGS=${CXXFLAGS:-} \
+      -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
+      -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-ON} \
+      -DARROW_BUILD_WARNING_LEVEL=${DARROW_BUILD_WARNING_LEVEL:-CHECKIN} \
+      -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-ON} \
       -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-ON} \
+      -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
+      -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-ON} \
+      -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
+      -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
+      -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
+      -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
+      -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
       -DARROW_FLIGHT=${ARROW_FLIGHT:-ON} \
       -DARROW_ORC=${ARROW_ORC:-ON} \
       -DARROW_PLASMA=${ARROW_PLASMA:-ON} \
@@ -45,19 +53,19 @@ cmake -GNinja \
       -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \
       -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
       -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \
-      -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
-      -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
+      -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-ON} \
+      -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-ON} \
       -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
       -DARROW_EXTRA_ERROR_CONTEXT=ON \
-      -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
-      -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
       -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
       -DPARQUET_REQUIRE_ENCRYPTION=${ARROW_WITH_OPENSSL:-ON} \
-      -DCMAKE_CXX_FLAGS=$CXXFLAGS \
       -DBOOST_SOURCE=${ARROW_BOOST_SOURCE:-AUTO} \
       -Duriparser_SOURCE=AUTO \
+      -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
+      -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
       ${CMAKE_ARGS} \
       ${source_dir}
+
 ninja
 ninja install
 
diff --git a/ci/docker_build_java.sh b/ci/docker_build_java.sh
index f3dd3f1446b..e6516b77831 100755
--- a/ci/docker_build_java.sh
+++ b/ci/docker_build_java.sh
@@ -25,10 +25,23 @@ mkdir -p /build/java
 
 arrow_src=/build/java/arrow
 
+# Remove any pre-existing artifacts
+rm -rf $arrow_src
+
 pushd /arrow
-  rsync -a header java format integration $arrow_src
+rsync -a header java format integration $arrow_src
 popd
 
+JAVA_ARGS=
+if [ "$ARROW_JAVA_RUN_TESTS" != "1" ]; then
+  JAVA_ARGS=-DskipTests
+fi
+
 pushd $arrow_src/java
-  mvn -B -DskipTests -Drat.skip=true install
+mvn -B $JAVA_ARGS -Drat.skip=true install
+
+if [ "$ARROW_JAVADOC" == "1" ]; then
+  export MAVEN_OPTS="$MAVEN_OPTS -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+  mvn -B site
+fi
 popd
diff --git a/ci/docker_java_test_all.sh b/ci/docker_java_test_all.sh
new file mode 100755
index 00000000000..1466907d9c4
--- /dev/null
+++ b/ci/docker_java_test_all.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+export ARROW_TEST_DATA=/arrow/testing/data
+
+export ARROW_JAVA_RUN_TESTS=1
+
+export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+export ARROW_JAVADOC=1
+bash $SOURCE_DIR/docker_build_java.sh
+
+export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64
+export ARROW_JAVADOC=0
+bash $SOURCE_DIR/docker_build_java.sh
diff --git a/ci/rust-build-main.bat b/ci/rust-build-main.bat
index 5bf1c843928..e7f3c32a549 100644
--- a/ci/rust-build-main.bat
+++ b/ci/rust-build-main.bat
@@ -29,7 +29,7 @@ pushd rust
 
 rustup default nightly
 rustup show
-cargo build --target %TARGET% --release || exit /B
+cargo build --target %TARGET% --all-targets --release || exit /B
 @echo
 @echo Test (release)
 @echo --------------
diff --git a/ci/travis/after-failure.sh b/ci/travis/after-failure.sh
new file mode 100755
index 00000000000..9e77ec3eee3
--- /dev/null
+++ b/ci/travis/after-failure.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [[ "${TRAVIS_OS_NAME}" = "osx" ]]; then
+  COREFILE=$(find /cores -maxdepth 1 -type f -name "core.*" | head -n 1)
+  if [[ -f "$COREFILE" ]]; then
+    lldb -c "$COREFILE" --batch --one-line "thread backtrace all -e true"
+  fi
+  ls -la ~/Library/Logs/DiagnosticReports/
+  cat ~/Library/Logs/DiagnosticReports/*.crash
+else
+  ls -fd1 /tmp/core.*
+  COREFILE=$(ls -fd1 /tmp/core.* | head -n 1)
+  if [[ -f "$COREFILE" ]]; then
+    gdb -c "$COREFILE" $TRAVIS_BUILD_DIR/current-exe -ex "thread apply all bt" -ex "set pagination 0" -batch
+  fi
+fi
+
+set +ex
diff --git a/ci/travis_script_javadoc.sh b/ci/travis/after-script.sh
similarity index 77%
rename from ci/travis_script_javadoc.sh
rename to ci/travis/after-script.sh
index 755d4628f20..f323f3cf20f 100755
--- a/ci/travis_script_javadoc.sh
+++ b/ci/travis/after-script.sh
@@ -1,5 +1,4 @@
-#!/usr/bin/env bash
-
+#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,15 +16,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
-
-source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-
-JAVA_DIR=${TRAVIS_BUILD_DIR}/java
-
-pushd $JAVA_DIR
+set -ex
 
-export MAVEN_OPTS="$MAVEN_OPTS -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
-$TRAVIS_MVN -B site
+if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then
+  ccache --show-stats
+fi
 
-popd
+set +ex
diff --git a/ci/travis/before-install.sh b/ci/travis/before-install.sh
new file mode 100755
index 00000000000..fd97b469341
--- /dev/null
+++ b/ci/travis/before-install.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+eval "${MATRIX_EVAL}"
+
+# Enable core files
+ulimit -c unlimited -S
+
+if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then
+  # Remove apport's core_pattern
+  sudo bash -c "echo '/tmp/core.%p.%E' > /proc/sys/kernel/core_pattern"
+
+  echo -e 'Acquire::Retries 10; Acquire::http::Timeout \"20\";' | \
+    sudo tee /etc/apt/apt.conf.d/99-travis-retry
+  sudo apt-get update -qq
+  ccache --show-stats
+fi
+
+eval "$(python ${TRAVIS_BUILD_DIR}/ci/detect-changes.py)"
+
+set +ex
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index 12b89738e54..3c261534aba 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -17,6 +17,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# NB(wesm): Here is the Travis CI entry removed in ARROW-5962
+
+# - name: "[manylinux1] Python"
+#   language: cpp
+#   env:
+#   - PYTHON_VERSIONS="3.6,16 3.7,16"
+#   before_script:
+#   - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker-compose pull python-manylinux1; fi
+#   script:
+#   - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then $TRAVIS_BUILD_DIR/ci/travis_script_manylinux.sh; fi
+
 set -ex
 
 # Testing for https://issues.apache.org/jira/browse/ARROW-2657
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index 8f507298fd4..202c24f0a58 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -192,11 +192,14 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then
     sudo bash -c "echo 2048 > /proc/sys/vm/nr_hugepages"
 fi
 
+# For core dump analysis
+ln -sf `which python` $TRAVIS_BUILD_DIR/current-exe
+
 # Need to run tests from the source tree for Cython coverage and conftest.py
 if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
     # Output Python coverage data in a persistent place
     export COVERAGE_FILE=$ARROW_PYTHON_COVERAGE_FILE
-    coverage run --append -m pytest $PYARROW_PYTEST_FLAGS pyarrow/tests
+    python -m coverage run --append -m pytest $PYARROW_PYTEST_FLAGS pyarrow/tests
 else
     python -m pytest $PYARROW_PYTEST_FLAGS pyarrow/tests
 fi
diff --git a/ci/travis_script_rust.sh b/ci/travis_script_rust.sh
index c25d64ec42c..704cb37bb06 100755
--- a/ci/travis_script_rust.sh
+++ b/ci/travis_script_rust.sh
@@ -31,7 +31,7 @@ rustup show
 # raises on any formatting errors
 cargo +stable fmt --all -- --check
 
-RUSTFLAGS="-D warnings" cargo build
+RUSTFLAGS="-D warnings" cargo build --all-targets
 cargo test
 
 # run examples
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 4b796db9e1b..c6588873d32 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -35,7 +35,6 @@ brew "python"
 brew "rapidjson"
 brew "re2"
 brew "snappy"
-brew "openssl"
 brew "thrift"
 brew "wget"
 brew "zstd"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1c706cd46df..93b5f0199c1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -18,7 +18,7 @@
 cmake_minimum_required(VERSION 3.2)
 message(STATUS "Building using CMake version: ${CMAKE_VERSION}")
 
-set(ARROW_VERSION "0.14.0")
+set(ARROW_VERSION "1.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
@@ -39,17 +39,20 @@ if(ARROW_VERSION_MAJOR STREQUAL ""
 endif()
 
 # The SO version is also the ABI version
-if(arrow_VERSION_MAJOR STREQUAL "0")
+if(ARROW_VERSION_MAJOR STREQUAL "0")
   # Arrow 0.x.y => SO version is "x", full SO version is "x.y.0"
   set(ARROW_SO_VERSION "${ARROW_VERSION_MINOR}")
   set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
 else()
-  message(FATAL_ERROR "Need to implement SO version generation for Arrow 1.0+")
+  # Arrow 1.x.y => SO version is "10x", full SO version is "10x.y.0"
+  math(EXPR ARROW_SO_VERSION "${ARROW_VERSION_MAJOR} * 100 + ${ARROW_VERSION_MINOR}")
+  set(ARROW_FULL_SO_VERSION "${ARROW_SO_VERSION}.${ARROW_VERSION_PATCH}.0")
 endif()
 
 message(STATUS "Arrow version: "
                "${ARROW_VERSION_MAJOR}.${ARROW_VERSION_MINOR}.${ARROW_VERSION_PATCH} "
                "(full: '${ARROW_VERSION}')")
+message(STATUS "Arrow SO version: ${ARROW_SO_VERSION} (full: ${ARROW_FULL_SO_VERSION})")
 
 set(ARROW_SOURCE_DIR ${PROJECT_SOURCE_DIR})
 set(ARROW_BINARY_DIR ${PROJECT_BINARY_DIR})
@@ -70,6 +73,11 @@ if(POLICY CMP0054)
   cmake_policy(SET CMP0054 NEW)
 endif()
 
+if(POLICY CMP0068)
+  # https://cmake.org/cmake/help/v3.9/policy/CMP0068.html
+  cmake_policy(SET CMP0068 NEW)
+endif()
+
 # don't ignore <PackageName>_ROOT variables in find_package
 if(POLICY CMP0074)
   # https://cmake.org/cmake/help/v3.12/policy/CMP0074.html
diff --git a/cpp/Dockerfile.ubuntu-bionic b/cpp/Dockerfile.ubuntu-bionic
index e2fd92d1a41..763e13e50c8 100644
--- a/cpp/Dockerfile.ubuntu-bionic
+++ b/cpp/Dockerfile.ubuntu-bionic
@@ -17,78 +17,102 @@
 
 FROM ubuntu:bionic
 
+# pipefail is enabled for proper error detection in the `wget | apt-key add`
+# step
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
-ENV DEBIAN_FRONTEND noninteractive
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+      wget software-properties-common gpg-agent && \
+      apt-get clean && rm -rf /var/lib/apt/lists*
+
+# Installs LLVM toolchain, for gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build due to slow download speed of
+# llvm compared to ubuntu apt mirrors.
+ARG LLVM_VERSION=7
+# Args are only exposed in the "build" step, this ensure that LLVM_VERSION is
+# found in the "run" step.
+ENV LLVM_VERSION=${LLVM_VERSION}
+ARG LLVM_APT_URL="http://apt.llvm.org/bionic/"
+ARG LLVM_APT_ARCH="llvm-toolchain-bionic-${LLVM_VERSION}"
+RUN wget -q -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+    apt-add-repository -y --update "deb ${LLVM_APT_URL} ${LLVM_APT_ARCH} main" && \
+    apt-get install -y -q --no-install-recommends \
+      clang-${LLVM_VERSION} \
+      clang-format-${LLVM_VERSION} \
+      clang-tidy-${LLVM_VERSION} \
+      llvm-${LLVM_VERSION}-dev && \
+      apt-get clean && rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
 RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends wget software-properties-common gpg-agent && \
-    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-    apt-add-repository -y "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" && \
-    apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
-        autoconf \
-        bison \
-        ca-certificates \
-        ccache \
-        clang-7 \
-        cmake \
-        flex \
-        g++ \
-        gcc \
-        git \
-        libbenchmark-dev \
-        libboost-all-dev \
-        libbrotli-dev \
-        libbz2-dev \
-        libc-ares-dev \
-        libdouble-conversion-dev \
-        libgflags-dev \
-        libgoogle-glog-dev \
-        libgrpc-dev \
-        libgrpc++-dev \
-        liblz4-dev \
-        libprotoc-dev \
-        libprotobuf-dev \
-        libre2-dev \
-        libsnappy-dev \
-        libssl-dev \
-        libzstd-dev \
-        llvm-7-dev \
-        make \
-        ninja-build \
-        pkg-config \
-        protobuf-compiler \
-        protobuf-compiler-grpc \
-        rapidjson-dev \
-        tzdata \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
+      autoconf \
+      bison \
+      ca-certificates \
+      ccache \
+      cmake \
+      flex \
+      g++ \
+      gcc \
+      git \
+      libbenchmark-dev \
+      libboost-filesystem-dev \
+      libboost-regex-dev \
+      libboost-system-dev \
+      libbrotli-dev \
+      libbz2-dev \
+      libdouble-conversion-dev \
+      libgflags-dev \
+      libgoogle-glog-dev \
+      liblz4-dev \
+      liblzma-dev \
+      libre2-dev \
+      libsnappy-dev \
+      libssl-dev \
+      libzstd-dev \
+      ninja-build \
+      pkg-config \
+      rapidjson-dev \
+      thrift-compiler \
+      tzdata && \
+      apt-get clean && rm -rf /var/lib/apt/lists*
 
-# Ubuntu's gtest just provides sources, the compiled version is only available
-# from Ubuntu Cosmic on.
-# ARROW_GANDIVA_JAVA requires CMake 3.11
-# TODO: gRPC is too old on Bionic and c-ares CMake config is not installed thus
-#   we need to build both from source.
-# protobuf does not come with PHP but grpc needs it to built, thus also
-# built Protobuf from source: https://github.com/grpc/grpc/issues/15949
-ENV CC=gcc \
-     CXX=g++ \
-     ARROW_BUILD_BENCHMARKS=ON \
-     ARROW_BUILD_TESTS=ON \
-     ARROW_DEPENDENCY_SOURCE=SYSTEM \
-     ARROW_FLIGHT=ON \
-     ARROW_GANDIVA=ON \
-     ARROW_GANDIVA_JAVA=OFF \
-     ARROW_PARQUET=ON \
-     ARROW_HOME=/usr \
-     ARROW_WITH_ZSTD=ON \
-     CMAKE_ARGS="-DThrift_SOURCE=BUNDLED \
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+# - thrift is too old
+ENV CMAKE_ARGS="-DThrift_SOURCE=BUNDLED \
 -DFlatbuffers_SOURCE=BUNDLED \
 -DGTest_SOURCE=BUNDLED \
--DgRPC_SOURCE=BUNDLED \
--Dc-ares_SOURCE=BUNDLED \
 -DORC_SOURCE=BUNDLED \
--DProtobuf_SOURCE=BUNDLED"
+-Dc-ares_SOURCE=BUNDLED \
+-DgRPC_SOURCE=BUNDLED \
+-DProtobuf_SOURCE=BUNDLED ${CMAKE_ARGS}"
+
+# Prioritize system packages and local installation
+ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_FLIGHT=ON \
+    ARROW_GANDIVA=ON \
+    ARROW_HDFS=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_PLASMA=ON \
+    ARROW_USE_ASAN=ON \
+    ARROW_USE_UBSAN=ON \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_ZSTD=ON
+
+ENV CC=clang-${LLVM_VERSION} \
+    CXX=clang++-${LLVM_VERSION}
 
-# build and test
 CMD ["arrow/ci/docker_build_and_test_cpp.sh"]
diff --git a/cpp/build-support/get_apache_mirror.py b/cpp/build-support/get_apache_mirror.py
index 38ea6f4d294..ac55abad451 100755
--- a/cpp/build-support/get_apache_mirror.py
+++ b/cpp/build-support/get_apache_mirror.py
@@ -20,6 +20,8 @@
 # mirror for downloading dependencies, e.g. in CMake
 
 import json
+import warnings
+
 try:
     import requests
 
@@ -35,6 +37,14 @@ def get_url(url):
     def get_url(url):
         return urlopen(url).read()
 
-suggested_mirror = get_url('https://www.apache.org/dyn/'
-                           'closer.cgi?as_json=1')
-print(json.loads(suggested_mirror.decode('utf-8'))['preferred'])
+url = 'https://www.apache.org/dyn/closer.cgi?as_json=1'
+
+try:
+    suggested_mirror = get_url(url)
+except Exception as e:
+    warnings.warn("Failed loading {url!r}: {e}".format(**locals()),
+                  RuntimeWarning)
+    # Well-known mirror, in case the URL above fails loading
+    print("http://apache.osuosl.org/")
+else:
+    print(json.loads(suggested_mirror.decode('utf-8'))['preferred'])
diff --git a/cpp/cmake_modules/Findc-aresAlt.cmake b/cpp/cmake_modules/Findc-aresAlt.cmake
deleted file mode 100644
index a769fb533ee..00000000000
--- a/cpp/cmake_modules/Findc-aresAlt.cmake
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if(c-ares_ROOT)
-  find_library(CARES_LIB
-               NAMES cares
-               PATHS ${c-ares_ROOT}
-               PATH_SUFFIXES ${LIB_PATH_SUFFIXES}
-               NO_DEFAULT_PATH)
-  find_path(CARES_INCLUDE_DIR
-            NAMES ares.h
-            PATHS ${c-ares_ROOT}
-            NO_DEFAULT_PATH
-            PATH_SUFFIXES ${INCLUDE_PATH_SUFFIXES})
-else()
-  find_library(CARES_LIB NAMES cares PATH_SUFFIXES ${LIB_PATH_SUFFIXES})
-  find_path(CARES_INCLUDE_DIR NAMES ares.h PATH_SUFFIXES ${INCLUDE_PATH_SUFFIXES})
-endif()
-
-find_package_handle_standard_args(c-aresAlt REQUIRED_VARS CARES_INCLUDE_DIR CARES_LIB)
-
-if(c-aresAlt_FOUND)
-  add_library(c-ares::cares UNKNOWN IMPORTED)
-  set_target_properties(c-ares::cares
-                        PROPERTIES IMPORTED_LOCATION "${CARES_LIB}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${CARES_INCLUDE_DIR}")
-endif()
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 496904b5233..9eba9e8cfd1 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -24,6 +24,7 @@ check_cxx_compiler_flag("-msse4.2" CXX_SUPPORTS_SSE4_2)
 check_cxx_compiler_flag("-maltivec" CXX_SUPPORTS_ALTIVEC)
 # Arm64 compiler flags
 check_cxx_compiler_flag("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC)
+check_cxx_compiler_flag("-march=armv8-a+crc+crypto" CXX_SUPPORTS_ARMV8_CRC_CRYPTO)
 
 # Support C11
 set(CMAKE_C_STANDARD 11)
@@ -265,7 +266,11 @@ if(CXX_SUPPORTS_ALTIVEC AND ARROW_ALTIVEC)
 endif()
 
 if(CXX_SUPPORTS_ARMCRC)
-  set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc")
+  if(CXX_SUPPORTS_ARMV8_CRC_CRYPTO)
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc+crypto")
+  else()
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc")
+  endif()
 endif()
 
 if(ARROW_USE_SIMD)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index f6677e0165e..15c8b6e0765 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -40,11 +40,10 @@ set(APACHE_MIRROR "")
 
 macro(get_apache_mirror)
   if(APACHE_MIRROR STREQUAL "")
-    exec_program(${PYTHON_EXECUTABLE}
-                 ARGS
-                 ${CMAKE_SOURCE_DIR}/build-support/get_apache_mirror.py
-                 OUTPUT_VARIABLE
-                 APACHE_MIRROR)
+    execute_process(COMMAND ${PYTHON_EXECUTABLE}
+                            ${CMAKE_SOURCE_DIR}/build-support/get_apache_mirror.py
+                    OUTPUT_VARIABLE APACHE_MIRROR
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
   endif()
 endmacro()
 
@@ -94,10 +93,6 @@ if(ARROW_DEPENDENCY_SOURCE STREQUAL "CONDA")
   endif()
   set(ARROW_ACTUAL_DEPENDENCY_SOURCE "SYSTEM")
   message(STATUS "Using CONDA_PREFIX for ARROW_PACKAGE_PREFIX: ${ARROW_PACKAGE_PREFIX}")
-  # ARROW-5564: Remove this when uriparser gets a conda package
-  if("${uriparser_SOURCE}" STREQUAL "")
-    set(uriparser_SOURCE "AUTO")
-  endif()
 else()
   set(ARROW_ACTUAL_DEPENDENCY_SOURCE "${ARROW_DEPENDENCY_SOURCE}")
 endif()
@@ -184,6 +179,19 @@ macro(resolve_dependency DEPENDENCY_NAME)
   endif()
 endmacro()
 
+macro(resolve_dependency_with_version DEPENDENCY_NAME REQUIRED_VERSION)
+  if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
+    find_package(${DEPENDENCY_NAME} ${REQUIRED_VERSION} MODULE)
+    if(NOT ${${DEPENDENCY_NAME}_FOUND})
+      build_dependency(${DEPENDENCY_NAME})
+    endif()
+  elseif(${DEPENDENCY_NAME}_SOURCE STREQUAL "BUNDLED")
+    build_dependency(${DEPENDENCY_NAME})
+  elseif(${DEPENDENCY_NAME}_SOURCE STREQUAL "SYSTEM")
+    find_package(${DEPENDENCY_NAME} ${REQUIRED_VERSION} REQUIRED)
+  endif()
+endmacro()
+
 # ----------------------------------------------------------------------
 # Thirdparty versions, environment variables, source URLs
 
@@ -1286,7 +1294,12 @@ macro(build_protobuf)
 endmacro()
 
 if(ARROW_WITH_PROTOBUF)
-  resolve_dependency(Protobuf)
+  if(ARROW_WITH_GRPC)
+    set(ARROW_PROTOBUF_REQUIRED_VERSION "3.6.0")
+  else()
+    set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
+  endif()
+  resolve_dependency_with_version(Protobuf ${ARROW_PROTOBUF_REQUIRED_VERSION})
 
   if(ARROW_PROTOBUF_USE_SHARED AND MSVC)
     add_definitions(-DPROTOBUF_USE_DLLS)
@@ -2041,22 +2054,14 @@ endmacro()
 
 if(ARROW_WITH_GRPC)
   if(c-ares_SOURCE STREQUAL "AUTO")
-    find_package(c-ares QUIET)
+    find_package(c-ares QUIET CONFIG)
     if(NOT c-ares_FOUND)
-      # Fedora doesn't package the CMake config
-      find_package(c-aresAlt)
-    endif()
-    if(NOT c-ares_FOUND AND NOT c-aresAlt_FOUND)
       build_cares()
     endif()
   elseif(c-ares_SOURCE STREQUAL "BUNDLED")
     build_cares()
   elseif(c-ares_SOURCE STREQUAL "SYSTEM")
-    find_package(c-ares QUIET)
-    if(NOT c-ares_FOUND)
-      # Fedora doesn't package the CMake config
-      find_package(c-aresAlt REQUIRED)
-    endif()
+    find_package(c-ares REQUIRED CONFIG)
   endif()
 
   # TODO: Don't use global includes but rather target_include_directories
@@ -2144,6 +2149,9 @@ macro(build_grpc)
       -DCMAKE_INSTALL_LIBDIR=lib
       "-DProtobuf_PROTOC_LIBRARY=${GRPC_Protobuf_PROTOC_LIBRARY}"
       -DBUILD_SHARED_LIBS=OFF)
+  if(OPENSSL_ROOT_DIR)
+    list(APPEND GRPC_CMAKE_ARGS -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR})
+  endif()
 
   # XXX the gRPC git checkout is huge and takes a long time
   # Ideally, we should be able to use the tarballs, but they don't contain
@@ -2286,7 +2294,7 @@ macro(build_orc)
     endif()
     if("${COMPILER_VERSION}" VERSION_GREATER "4.0")
       set(ORC_CMAKE_CXX_FLAGS " -Wno-zero-as-null-pointer-constant \
--Wno-inconsistent-missing-destructor-override ")
+-Wno-inconsistent-missing-destructor-override -Wno-error=undef ")
     endif()
   endif()
 
diff --git a/cpp/examples/arrow/row-wise-conversion-example.cc b/cpp/examples/arrow/row-wise-conversion-example.cc
index db8c28753db..c6e45d0d41e 100644
--- a/cpp/examples/arrow/row-wise-conversion-example.cc
+++ b/cpp/examples/arrow/row-wise-conversion-example.cc
@@ -139,11 +139,11 @@ arrow::Status ColumnarTableToVector(const std::shared_ptr<arrow::Table>& table,
   // border would be inside a byte.
 
   auto ids =
-      std::static_pointer_cast<arrow::Int64Array>(table->column(0)->data()->chunk(0));
+      std::static_pointer_cast<arrow::Int64Array>(table->column(0)->chunk(0));
   auto costs =
-      std::static_pointer_cast<arrow::DoubleArray>(table->column(1)->data()->chunk(0));
+      std::static_pointer_cast<arrow::DoubleArray>(table->column(1)->chunk(0));
   auto cost_components =
-      std::static_pointer_cast<arrow::ListArray>(table->column(2)->data()->chunk(0));
+      std::static_pointer_cast<arrow::ListArray>(table->column(2)->chunk(0));
   auto cost_components_values =
       std::static_pointer_cast<arrow::DoubleArray>(cost_components->values());
   // To enable zero-copy slices, the native values pointer might need to account
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 68ab60c31b3..10f067e187b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -370,7 +370,6 @@ add_arrow_test(tensor-test)
 add_arrow_test(sparse_tensor-test)
 
 add_arrow_benchmark(builder-benchmark)
-add_arrow_benchmark(column-benchmark)
 
 add_subdirectory(array)
 add_subdirectory(csv)
diff --git a/cpp/src/arrow/array-binary-test.cc b/cpp/src/arrow/array-binary-test.cc
index cb8d6d53064..71fb81ebb07 100644
--- a/cpp/src/arrow/array-binary-test.cc
+++ b/cpp/src/arrow/array-binary-test.cc
@@ -40,6 +40,9 @@ namespace arrow {
 
 using internal::checked_cast;
 
+using StringTypes =
+    ::testing::Types<StringType, LargeStringType, BinaryType, LargeBinaryType>;
+
 // ----------------------------------------------------------------------
 // String / Binary tests
 
@@ -67,8 +70,14 @@ void CheckStringArray(const ArrayType& array, const std::vector<std::string>& st
   }
 }
 
+template <typename T>
 class TestStringArray : public ::testing::Test {
  public:
+  using TypeClass = T;
+  using offset_type = typename TypeClass::offset_type;
+  using ArrayType = typename TypeTraits<TypeClass>::ArrayType;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+
   void SetUp() {
     chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
     offsets_ = {0, 1, 1, 1, 3, 6};
@@ -85,268 +94,132 @@ class TestStringArray : public ::testing::Test {
     ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
     null_count_ = CountNulls(valid_bytes_);
 
-    strings_ = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
-                                             null_bitmap_, null_count_);
-  }
-
- protected:
-  std::vector<int32_t> offsets_;
-  std::vector<char> chars_;
-  std::vector<uint8_t> valid_bytes_;
-
-  std::vector<std::string> expected_;
-
-  std::shared_ptr<Buffer> value_buf_;
-  std::shared_ptr<Buffer> offsets_buf_;
-  std::shared_ptr<Buffer> null_bitmap_;
-
-  int64_t null_count_;
-  int64_t length_;
-
-  std::shared_ptr<StringArray> strings_;
-};
-
-TEST_F(TestStringArray, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_->length());
-  ASSERT_EQ(1, strings_->null_count());
-  ASSERT_OK(ValidateArray(*strings_));
-}
-
-TEST_F(TestStringArray, TestType) {
-  std::shared_ptr<DataType> type = strings_->type();
-
-  ASSERT_EQ(Type::STRING, type->id());
-  ASSERT_EQ(Type::STRING, strings_->type_id());
-}
-
-TEST_F(TestStringArray, TestListFunctions) {
-  int pos = 0;
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_->value_offset(i));
-    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
-    pos += static_cast<int>(expected_[i].size());
-  }
-}
-
-TEST_F(TestStringArray, TestDestructor) {
-  auto arr = std::make_shared<StringArray>(length_, offsets_buf_, value_buf_,
+    strings_ = std::make_shared<ArrayType>(length_, offsets_buf_, value_buf_,
                                            null_bitmap_, null_count_);
-}
+  }
 
-TEST_F(TestStringArray, TestGetString) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
+  void _TestArrayBasics() {
+    ASSERT_EQ(length_, strings_->length());
+    ASSERT_EQ(1, strings_->null_count());
+    ASSERT_OK(ValidateArray(*strings_));
+    TestInitialized(*strings_);
+    AssertZeroPadded(*strings_);
+  }
+
+  void _TestType() {
+    std::shared_ptr<DataType> type = this->strings_->type();
+
+    if (std::is_same<TypeClass, StringType>::value) {
+      ASSERT_EQ(Type::STRING, type->id());
+      ASSERT_EQ(Type::STRING, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, LargeStringType>::value) {
+      ASSERT_EQ(Type::LARGE_STRING, type->id());
+      ASSERT_EQ(Type::LARGE_STRING, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, BinaryType>::value) {
+      ASSERT_EQ(Type::BINARY, type->id());
+      ASSERT_EQ(Type::BINARY, this->strings_->type_id());
+    } else if (std::is_same<TypeClass, LargeBinaryType>::value) {
+      ASSERT_EQ(Type::LARGE_BINARY, type->id());
+      ASSERT_EQ(Type::LARGE_BINARY, this->strings_->type_id());
     } else {
-      ASSERT_EQ(expected_[i], strings_->GetString(i));
+      FAIL();
     }
   }
-}
-
-TEST_F(TestStringArray, TestEmptyStringComparison) {
-  offsets_ = {0, 0, 0, 0, 0, 0};
-  offsets_buf_ = Buffer::Wrap(offsets_);
-  length_ = static_cast<int64_t>(offsets_.size() - 1);
-
-  auto strings_a = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
-                                                 null_bitmap_, null_count_);
-  auto strings_b = std::make_shared<StringArray>(length_, offsets_buf_, nullptr,
-                                                 null_bitmap_, null_count_);
-  ASSERT_TRUE(strings_a->Equals(strings_b));
-}
-
-TEST_F(TestStringArray, CompareNullByteSlots) {
-  StringBuilder builder;
-  StringBuilder builder2;
-  StringBuilder builder3;
-
-  ASSERT_OK(builder.Append("foo"));
-  ASSERT_OK(builder2.Append("foo"));
-  ASSERT_OK(builder3.Append("foo"));
-
-  ASSERT_OK(builder.Append("bar"));
-  ASSERT_OK(builder2.AppendNull());
-
-  // same length, but different
-  ASSERT_OK(builder3.Append("xyz"));
-
-  ASSERT_OK(builder.Append("baz"));
-  ASSERT_OK(builder2.Append("baz"));
-  ASSERT_OK(builder3.Append("baz"));
-
-  std::shared_ptr<Array> array, array2, array3;
-  FinishAndCheckPadding(&builder, &array);
-  ASSERT_OK(builder2.Finish(&array2));
-  ASSERT_OK(builder3.Finish(&array3));
-
-  const auto& a1 = checked_cast<const StringArray&>(*array);
-  const auto& a2 = checked_cast<const StringArray&>(*array2);
-  const auto& a3 = checked_cast<const StringArray&>(*array3);
-
-  // The validity bitmaps are the same, the data is different, but the unequal
-  // portion is masked out
-  StringArray equal_array(3, a1.value_offsets(), a1.value_data(), a2.null_bitmap(), 1);
-  StringArray equal_array2(3, a3.value_offsets(), a3.value_data(), a2.null_bitmap(), 1);
 
-  ASSERT_TRUE(equal_array.Equals(equal_array2));
-  ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
-
-  ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
-  ASSERT_TRUE(
-      equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, equal_array2.Array::Slice(1)));
-}
-
-TEST_F(TestStringArray, TestSliceGetString) {
-  StringBuilder builder;
-
-  ASSERT_OK(builder.Append("a"));
-  ASSERT_OK(builder.Append("b"));
-  ASSERT_OK(builder.Append("c"));
-
-  std::shared_ptr<Array> array;
-  ASSERT_OK(builder.Finish(&array));
-  auto s = array->Slice(1, 10);
-  auto arr = std::dynamic_pointer_cast<StringArray>(s);
-  ASSERT_EQ(arr->GetString(0), "b");
-}
-
-// ----------------------------------------------------------------------
-// String builder tests
-
-class TestStringBuilder : public TestBuilder {
- public:
-  void SetUp() {
-    TestBuilder::SetUp();
-    builder_.reset(new StringBuilder(pool_));
+  void _TestListFunctions() {
+    int64_t pos = 0;
+    for (size_t i = 0; i < expected_.size(); ++i) {
+      ASSERT_EQ(pos, strings_->value_offset(i));
+      ASSERT_EQ(expected_[i].size(), strings_->value_length(i));
+      pos += expected_[i].size();
+    }
   }
 
-  void Done() {
-    std::shared_ptr<Array> out;
-    FinishAndCheckPadding(builder_.get(), &out);
-
-    result_ = std::dynamic_pointer_cast<StringArray>(out);
-    ASSERT_OK(ValidateArray(*result_));
+  void _TestDestructor() {
+    auto arr = std::make_shared<ArrayType>(length_, offsets_buf_, value_buf_,
+                                           null_bitmap_, null_count_);
   }
 
- protected:
-  std::unique_ptr<StringBuilder> builder_;
-  std::shared_ptr<StringArray> result_;
-};
-
-TEST_F(TestStringBuilder, TestScalarAppend) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
-
-  int N = static_cast<int>(strings.size());
-  int reps = 1000;
-
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        ASSERT_OK(builder_->AppendNull());
+  void _TestGetString() {
+    for (size_t i = 0; i < expected_.size(); ++i) {
+      if (valid_bytes_[i] == 0) {
+        ASSERT_TRUE(strings_->IsNull(i));
       } else {
-        ASSERT_OK(builder_->Append(strings[i]));
+        ASSERT_FALSE(strings_->IsNull(i));
+        ASSERT_EQ(expected_[i], strings_->GetString(i));
       }
     }
   }
-  Done();
-
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
-
-TEST_F(TestStringBuilder, TestAppendVector) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
-
-  int N = static_cast<int>(strings.size());
-  int reps = 1000;
-
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, valid_bytes.data()));
-  }
-  Done();
-
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, valid_bytes, reps);
-}
-
-TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
-  const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
-  std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
-
-  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
-  int reps = 1000;
+  void _TestEmptyStringComparison() {
+    offsets_ = {0, 0, 0, 0, 0, 0};
+    offsets_buf_ = Buffer::Wrap(offsets_);
+    length_ = static_cast<int64_t>(offsets_.size() - 1);
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, N, valid_bytes.data()));
+    auto strings_a = std::make_shared<ArrayType>(length_, offsets_buf_, nullptr,
+                                                 null_bitmap_, null_count_);
+    auto strings_b = std::make_shared<ArrayType>(length_, offsets_buf_, nullptr,
+                                                 null_bitmap_, null_count_);
+    ASSERT_TRUE(strings_a->Equals(strings_b));
   }
-  Done();
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps * 3, result_->null_count());
-  ASSERT_EQ(reps * 3, result_->value_data()->size());
+  void _TestCompareNullByteSlots() {
+    BuilderType builder;
+    BuilderType builder2;
+    BuilderType builder3;
 
-  CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
-}
+    ASSERT_OK(builder.Append("foo"));
+    ASSERT_OK(builder2.Append("foo"));
+    ASSERT_OK(builder3.Append("foo"));
 
-TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
-  const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
+    ASSERT_OK(builder.Append("bar"));
+    ASSERT_OK(builder2.AppendNull());
 
-  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
-  int reps = 1000;
+    // same length, but different
+    ASSERT_OK(builder3.Append("xyz"));
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder_->AppendValues(strings, N));
-  }
-  Done();
+    ASSERT_OK(builder.Append("baz"));
+    ASSERT_OK(builder2.Append("baz"));
+    ASSERT_OK(builder3.Append("baz"));
 
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
+    std::shared_ptr<Array> array, array2, array3;
+    FinishAndCheckPadding(&builder, &array);
+    ASSERT_OK(builder2.Finish(&array2));
+    ASSERT_OK(builder3.Finish(&array3));
 
-  CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
-}
+    const auto& a1 = checked_cast<const ArrayType&>(*array);
+    const auto& a2 = checked_cast<const ArrayType&>(*array2);
+    const auto& a3 = checked_cast<const ArrayType&>(*array3);
 
-TEST_F(TestStringBuilder, TestZeroLength) {
-  // All buffers are null
-  Done();
-}
+    // The validity bitmaps are the same, the data is different, but the unequal
+    // portion is masked out
+    ArrayType equal_array(3, a1.value_offsets(), a1.value_data(), a2.null_bitmap(), 1);
+    ArrayType equal_array2(3, a3.value_offsets(), a3.value_data(), a2.null_bitmap(), 1);
 
-// Binary container type
-// TODO(emkornfield) there should be some way to refactor these to avoid code duplicating
-// with String
-class TestBinaryArray : public ::testing::Test {
- public:
-  void SetUp() {
-    chars_ = {'a', 'b', 'b', 'c', 'c', 'c'};
-    offsets_ = {0, 1, 1, 1, 3, 6};
-    valid_bytes_ = {1, 1, 0, 1, 1};
-    expected_ = {"a", "", "", "bb", "ccc"};
+    ASSERT_TRUE(equal_array.Equals(equal_array2));
+    ASSERT_TRUE(a2.RangeEquals(equal_array2, 0, 3, 0));
 
-    MakeArray();
+    ASSERT_TRUE(equal_array.Array::Slice(1)->Equals(equal_array2.Array::Slice(1)));
+    ASSERT_TRUE(
+        equal_array.Array::Slice(1)->RangeEquals(0, 2, 0, equal_array2.Array::Slice(1)));
   }
 
-  void MakeArray() {
-    length_ = static_cast<int64_t>(offsets_.size() - 1);
-    value_buf_ = Buffer::Wrap(chars_);
-    offsets_buf_ = Buffer::Wrap(offsets_);
+  void _TestSliceGetString() {
+    BuilderType builder;
 
-    ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, default_memory_pool(), &null_bitmap_));
-    null_count_ = CountNulls(valid_bytes_);
+    ASSERT_OK(builder.Append("a"));
+    ASSERT_OK(builder.Append("b"));
+    ASSERT_OK(builder.Append("c"));
 
-    strings_ = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
-                                             null_bitmap_, null_count_);
+    std::shared_ptr<Array> array;
+    ASSERT_OK(builder.Finish(&array));
+    auto s = array->Slice(1, 10);
+    auto arr = std::dynamic_pointer_cast<ArrayType>(s);
+    ASSERT_EQ(arr->GetString(0), "b");
   }
 
  protected:
-  std::vector<int32_t> offsets_;
+  std::vector<offset_type> offsets_;
   std::vector<char> chars_;
   std::vector<uint8_t> valid_bytes_;
 
@@ -359,300 +232,161 @@ class TestBinaryArray : public ::testing::Test {
   int64_t null_count_;
   int64_t length_;
 
-  std::shared_ptr<BinaryArray> strings_;
+  std::shared_ptr<ArrayType> strings_;
 };
 
-TEST_F(TestBinaryArray, TestArrayBasics) {
-  ASSERT_EQ(length_, strings_->length());
-  ASSERT_EQ(1, strings_->null_count());
-  ASSERT_OK(ValidateArray(*strings_));
-}
+TYPED_TEST_CASE(TestStringArray, StringTypes);
 
-TEST_F(TestBinaryArray, TestType) {
-  std::shared_ptr<DataType> type = strings_->type();
+TYPED_TEST(TestStringArray, TestArrayBasics) { this->_TestArrayBasics(); }
 
-  ASSERT_EQ(Type::BINARY, type->id());
-  ASSERT_EQ(Type::BINARY, strings_->type_id());
-}
+TYPED_TEST(TestStringArray, TestType) { this->_TestType(); }
 
-TEST_F(TestBinaryArray, TestListFunctions) {
-  size_t pos = 0;
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    ASSERT_EQ(pos, strings_->value_offset(i));
-    ASSERT_EQ(static_cast<int>(expected_[i].size()), strings_->value_length(i));
-    pos += expected_[i].size();
-  }
-}
+TYPED_TEST(TestStringArray, TestListFunctions) { this->_TestListFunctions(); }
 
-TEST_F(TestBinaryArray, TestDestructor) {
-  auto arr = std::make_shared<BinaryArray>(length_, offsets_buf_, value_buf_,
-                                           null_bitmap_, null_count_);
-}
+TYPED_TEST(TestStringArray, TestDestructor) { this->_TestDestructor(); }
 
-TEST_F(TestBinaryArray, TestGetValue) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      ASSERT_FALSE(strings_->IsNull(i));
-      ASSERT_EQ(strings_->GetString(i), expected_[i]);
-    }
-  }
-}
+TYPED_TEST(TestStringArray, TestGetString) { this->_TestGetString(); }
 
-TEST_F(TestBinaryArray, TestNullValuesInitialized) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      ASSERT_FALSE(strings_->IsNull(i));
-      ASSERT_EQ(strings_->GetString(i), expected_[i]);
-    }
-  }
-  TestInitialized(*strings_);
+TYPED_TEST(TestStringArray, TestEmptyStringComparison) {
+  this->_TestEmptyStringComparison();
 }
 
-TEST_F(TestBinaryArray, TestPaddingZeroed) { AssertZeroPadded(*strings_); }
+TYPED_TEST(TestStringArray, CompareNullByteSlots) { this->_TestCompareNullByteSlots(); }
 
-TEST_F(TestBinaryArray, TestGetString) {
-  for (size_t i = 0; i < expected_.size(); ++i) {
-    if (valid_bytes_[i] == 0) {
-      ASSERT_TRUE(strings_->IsNull(i));
-    } else {
-      std::string val = strings_->GetString(i);
-      ASSERT_EQ(0, std::memcmp(expected_[i].data(), val.c_str(), val.size()));
-    }
-  }
-}
+TYPED_TEST(TestStringArray, TestSliceGetString) { this->_TestSliceGetString(); }
 
-TEST_F(TestBinaryArray, TestEqualsEmptyStrings) {
-  BinaryBuilder builder;
-
-  std::string empty_string("");
-  for (int i = 0; i < 5; ++i) {
-    ASSERT_OK(builder.Append(empty_string));
-  }
-
-  std::shared_ptr<Array> left_arr;
-  FinishAndCheckPadding(&builder, &left_arr);
-
-  const BinaryArray& left = checked_cast<const BinaryArray&>(*left_arr);
-  std::shared_ptr<Array> right =
-      std::make_shared<BinaryArray>(left.length(), left.value_offsets(), nullptr,
-                                    left.null_bitmap(), left.null_count());
-
-  ASSERT_TRUE(left.Equals(right));
-  ASSERT_TRUE(left.RangeEquals(0, left.length(), 0, right));
-}
+// ----------------------------------------------------------------------
+// String builder tests
 
-class TestBinaryBuilder : public TestBuilder {
+template <typename T>
+class TestStringBuilder : public TestBuilder {
  public:
+  using TypeClass = T;
+  using offset_type = typename TypeClass::offset_type;
+  using ArrayType = typename TypeTraits<TypeClass>::ArrayType;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+
   void SetUp() {
     TestBuilder::SetUp();
-    builder_.reset(new BinaryBuilder(pool_));
+    builder_.reset(new BuilderType(pool_));
   }
 
   void Done() {
     std::shared_ptr<Array> out;
     FinishAndCheckPadding(builder_.get(), &out);
 
-    result_ = std::dynamic_pointer_cast<BinaryArray>(out);
+    result_ = std::dynamic_pointer_cast<ArrayType>(out);
     ASSERT_OK(ValidateArray(*result_));
   }
 
- protected:
-  std::unique_ptr<BinaryBuilder> builder_;
-  std::shared_ptr<BinaryArray> result_;
-};
-
-TEST_F(TestBinaryBuilder, TestScalarAppend) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+  void _TestScalarAppend() {
+    std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+    std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
 
-  int N = static_cast<int>(strings.size());
-  int reps = 10;
+    int N = static_cast<int>(strings.size());
+    int reps = 1000;
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        ASSERT_OK(builder_->AppendNull());
-      } else {
-        ASSERT_OK(builder_->Append(strings[i]));
+    for (int j = 0; j < reps; ++j) {
+      for (int i = 0; i < N; ++i) {
+        if (!is_valid[i]) {
+          ASSERT_OK(builder_->AppendNull());
+        } else {
+          ASSERT_OK(builder_->Append(strings[i]));
+        }
       }
     }
-  }
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * 6, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
-
-TEST_F(TestBinaryBuilder, TestAppendNulls) {
-  ASSERT_OK(builder_->Append("bow"));
-  ASSERT_OK(builder_->AppendNulls(3));
-  ASSERT_OK(builder_->Append("arrow"));
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
-
-  ASSERT_EQ(5, result_->length());
-  ASSERT_EQ(3, result_->null_count());
-  ASSERT_EQ(8, result_->value_data()->size());
-
-  CheckStringArray(*result_, {"bow", "", "", "", "arrow"}, {1, 0, 0, 0, 1});
-}
+    Done();
 
-TEST_F(TestBinaryBuilder, TestScalarAppendUnsafe) {
-  std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
-  std::vector<uint8_t> is_valid = {1, 1, 1, 0, 1};
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  int N = static_cast<int>(strings.size());
-  int reps = 13;
-  int total_length = 0;
-  for (auto&& s : strings) total_length += static_cast<int>(s.size());
-
-  ASSERT_OK(builder_->Reserve(N * reps));
-  ASSERT_OK(builder_->ReserveData(total_length * reps));
-
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (!is_valid[i]) {
-        builder_->UnsafeAppendNull();
-      } else {
-        builder_->UnsafeAppend(strings[i]);
-      }
-    }
+    CheckStringArray(*result_, strings, is_valid, reps);
   }
-  ASSERT_EQ(builder_->value_data_length(), total_length * reps);
-  Done();
-  ASSERT_OK(ValidateArray(*result_));
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(reps, result_->null_count());
-  ASSERT_EQ(reps * total_length, result_->value_data()->size());
-
-  CheckStringArray(*result_, strings, is_valid, reps);
-}
-
-TEST_F(TestBinaryBuilder, TestCapacityReserve) {
-  std::vector<std::string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc",
-                                      "dddddddddd"};
-  int N = static_cast<int>(strings.size());
-  int reps = 15;
-  int64_t length = 0;
-  int64_t capacity = 1000;
-  int64_t expected_capacity = BitUtil::RoundUpToMultipleOf64(capacity);
-
-  ASSERT_OK(builder_->ReserveData(capacity));
 
-  ASSERT_EQ(length, builder_->value_data_length());
-  ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+  void _TestVectorAppend() {
+    std::vector<std::string> strings = {"", "bb", "a", "", "ccc"};
+    std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      ASSERT_OK(builder_->Append(strings[i]));
-      length += static_cast<int>(strings[i].size());
+    int N = static_cast<int>(strings.size());
+    int reps = 1000;
 
-      ASSERT_EQ(length, builder_->value_data_length());
-      ASSERT_EQ(expected_capacity, builder_->value_data_capacity());
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, valid_bytes.data()));
     }
-  }
-
-  int extra_capacity = 500;
-  expected_capacity = BitUtil::RoundUpToMultipleOf64(length + extra_capacity);
+    Done();
 
-  ASSERT_OK(builder_->ReserveData(extra_capacity));
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  ASSERT_EQ(length, builder_->value_data_length());
-  int64_t actual_capacity = builder_->value_data_capacity();
-  ASSERT_GE(actual_capacity, expected_capacity);
-  ASSERT_EQ(actual_capacity & 63, 0);
-
-  Done();
-
-  ASSERT_EQ(reps * N, result_->length());
-  ASSERT_EQ(0, result_->null_count());
-  ASSERT_EQ(reps * 40, result_->value_data()->size());
+    CheckStringArray(*result_, strings, valid_bytes, reps);
+  }
 
-  // Capacity is shrunk after `Finish`
-  ASSERT_EQ(640, result_->value_data()->capacity());
-}
+  void _TestAppendCStringsWithValidBytes() {
+    const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
+    std::vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
 
-TEST_F(TestBinaryBuilder, TestZeroLength) {
-  // All buffers are null
-  Done();
-}
+    int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+    int reps = 1000;
 
-// ----------------------------------------------------------------------
-// Slice tests
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, N, valid_bytes.data()));
+    }
+    Done();
 
-template <typename TYPE>
-void CheckSliceEquality() {
-  using Traits = TypeTraits<TYPE>;
-  using BuilderType = typename Traits::BuilderType;
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps * 3, result_->null_count());
+    ASSERT_EQ(reps * 3, result_->value_data()->size());
 
-  BuilderType builder;
+    CheckStringArray(*result_, {"", "aaa", "", "", ""}, {0, 1, 0, 0, 1}, reps);
+  }
 
-  std::vector<std::string> strings = {"foo", "", "bar", "baz", "qux", ""};
-  std::vector<uint8_t> is_null = {0, 1, 0, 1, 0, 0};
+  void _TestAppendCStringsWithoutValidBytes() {
+    const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
 
-  int N = static_cast<int>(strings.size());
-  int reps = 10;
+    int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+    int reps = 1000;
 
-  for (int j = 0; j < reps; ++j) {
-    for (int i = 0; i < N; ++i) {
-      if (is_null[i]) {
-        ASSERT_OK(builder.AppendNull());
-      } else {
-        ASSERT_OK(builder.Append(strings[i]));
-      }
+    for (int j = 0; j < reps; ++j) {
+      ASSERT_OK(builder_->AppendValues(strings, N));
     }
-  }
+    Done();
 
-  std::shared_ptr<Array> array;
-  FinishAndCheckPadding(&builder, &array);
+    ASSERT_EQ(reps * N, result_->length());
+    ASSERT_EQ(reps, result_->null_count());
+    ASSERT_EQ(reps * 6, result_->value_data()->size());
 
-  std::shared_ptr<Array> slice, slice2;
-
-  slice = array->Slice(5);
-  slice2 = array->Slice(5);
-  ASSERT_EQ(N * reps - 5, slice->length());
-
-  ASSERT_TRUE(slice->Equals(slice2));
-  ASSERT_TRUE(array->RangeEquals(5, slice->length(), 0, slice));
+    CheckStringArray(*result_, {"", "bb", "a", "", "ccc"}, {1, 1, 1, 0, 1}, reps);
+  }
 
-  // Chained slices
-  slice2 = array->Slice(2)->Slice(3);
-  ASSERT_TRUE(slice->Equals(slice2));
+  void _TestZeroLength() {
+    // All buffers are null
+    Done();
+    ASSERT_EQ(result_->length(), 0);
+    ASSERT_EQ(result_->null_count(), 0);
+  }
 
-  slice = array->Slice(5, 20);
-  slice2 = array->Slice(5, 20);
-  ASSERT_EQ(20, slice->length());
+ protected:
+  std::unique_ptr<BuilderType> builder_;
+  std::shared_ptr<ArrayType> result_;
+};
 
-  ASSERT_TRUE(slice->Equals(slice2));
-  ASSERT_TRUE(array->RangeEquals(5, 25, 0, slice));
+TYPED_TEST_CASE(TestStringBuilder, StringTypes);
 
-  ASSERT_OK(builder.Append("a"));
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder.Append(""));
-  }
-  FinishAndCheckPadding(&builder, &array);
-  slice = array->Slice(1);
+TYPED_TEST(TestStringBuilder, TestScalarAppend) { this->_TestScalarAppend(); }
 
-  for (int j = 0; j < reps; ++j) {
-    ASSERT_OK(builder.Append(""));
-  }
-  FinishAndCheckPadding(&builder, &array);
+TYPED_TEST(TestStringBuilder, TestVectorAppend) { this->_TestVectorAppend(); }
 
-  AssertArraysEqual(*slice, *array);
+TYPED_TEST(TestStringBuilder, TestAppendCStringsWithValidBytes) {
+  this->_TestAppendCStringsWithValidBytes();
 }
 
-TEST_F(TestBinaryArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
-
-TEST_F(TestStringArray, TestSliceEquality) { CheckSliceEquality<BinaryType>(); }
+TYPED_TEST(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
+  this->_TestAppendCStringsWithoutValidBytes();
+}
 
-TEST_F(TestBinaryArray, LengthZeroCtor) { BinaryArray array(0, nullptr, nullptr); }
+TYPED_TEST(TestStringBuilder, TestZeroLength) { this->_TestZeroLength(); }
 
 // ----------------------------------------------------------------------
 // ChunkedBinaryBuilder tests
diff --git a/cpp/src/arrow/array-dict-test.cc b/cpp/src/arrow/array-dict-test.cc
index 16d8aac4d59..ed37df30264 100644
--- a/cpp/src/arrow/array-dict-test.cc
+++ b/cpp/src/arrow/array-dict-test.cc
@@ -948,4 +948,49 @@ TEST(TestDictionary, TransposeNulls) {
   AssertArraysEqual(*expected, *out);
 }
 
+TEST(TestDictionary, DISABLED_ListOfDictionary) {
+  std::unique_ptr<ArrayBuilder> root_builder;
+  ASSERT_OK(MakeBuilder(default_memory_pool(), list(dictionary(int8(), utf8())),
+                        &root_builder));
+  auto list_builder = checked_cast<ListBuilder*>(root_builder.get());
+  auto dict_builder =
+      checked_cast<DictionaryBuilder<StringType>*>(list_builder->value_builder());
+
+  ASSERT_OK(list_builder->Append());
+  std::vector<std::string> expected;
+  for (char a : "abc") {
+    for (char d : "def") {
+      for (char g : "ghi") {
+        for (char j : "jkl") {
+          for (char m : "mno") {
+            for (char p : "pqr") {
+              if ((static_cast<int>(a) + d + g + j + m + p) % 16 == 0) {
+                ASSERT_OK(list_builder->Append());
+              }
+              // 3**6 distinct strings; too large for int8
+              char str[6] = {a, d, g, j, m, p};
+              ASSERT_OK(dict_builder->Append(str));
+              expected.push_back(str);
+            }
+          }
+        }
+      }
+    }
+  }
+  std::shared_ptr<Array> expected_dict;
+  ArrayFromVector<StringType, std::string>(expected, &expected_dict);
+
+  std::shared_ptr<Array> array;
+  ASSERT_OK(root_builder->Finish(&array));
+  ASSERT_OK(ValidateArray(*array));
+
+  auto expected_type = list(dictionary(int16(), utf8()));
+  ASSERT_EQ(array->type()->ToString(), expected_type->ToString());
+
+  auto list_array = checked_cast<const ListArray*>(array.get());
+  auto actual_dict =
+      checked_cast<const DictionaryArray&>(*list_array->values()).dictionary();
+  ASSERT_ARRAYS_EQUAL(*expected_dict, *actual_dict);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 2005a0db562..df8b85262ff 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -1300,7 +1300,6 @@ TEST_F(TestFWBinaryArray, ZeroSize) {
 
   ASSERT_OK(builder.Append(""));
   ASSERT_OK(builder.Append(std::string()));
-  ASSERT_OK(builder.Append(static_cast<const uint8_t*>(nullptr)));
   ASSERT_OK(builder.AppendNull());
   ASSERT_OK(builder.AppendNull());
   ASSERT_OK(builder.AppendNull());
@@ -1314,7 +1313,7 @@ TEST_F(TestFWBinaryArray, ZeroSize) {
   ASSERT_EQ(fw_array.values()->size(), 0);
   ASSERT_EQ(0, fw_array.byte_width());
 
-  ASSERT_EQ(6, array->length());
+  ASSERT_EQ(5, array->length());
   ASSERT_EQ(3, array->null_count());
 }
 
diff --git a/cpp/src/arrow/array-union-test.cc b/cpp/src/arrow/array-union-test.cc
index 86cbeae6d78..62a4e15eb0c 100644
--- a/cpp/src/arrow/array-union-test.cc
+++ b/cpp/src/arrow/array-union-test.cc
@@ -188,4 +188,219 @@ TEST_F(TestUnionArrayFactories, TestMakeSparse) {
                   type_codes);
 }
 
+template <typename B>
+class UnionBuilderTest : public ::testing::Test {
+ public:
+  uint8_t I8 = 8, STR = 13, DBL = 7;
+
+  virtual void AppendInt(int8_t i) {
+    expected_types_vector.push_back(I8);
+    ASSERT_OK(union_builder->Append(I8));
+    ASSERT_OK(i8_builder->Append(i));
+  }
+
+  virtual void AppendString(const std::string& str) {
+    expected_types_vector.push_back(STR);
+    ASSERT_OK(union_builder->Append(STR));
+    ASSERT_OK(str_builder->Append(str));
+  }
+
+  virtual void AppendDouble(double dbl) {
+    expected_types_vector.push_back(DBL);
+    ASSERT_OK(union_builder->Append(DBL));
+    ASSERT_OK(dbl_builder->Append(dbl));
+  }
+
+  void AppendBasics() {
+    AppendInt(33);
+    AppendString("abc");
+    AppendDouble(1.0);
+    AppendDouble(-1.0);
+    AppendString("");
+    AppendInt(10);
+    AppendString("def");
+    AppendInt(-10);
+    AppendDouble(0.5);
+    ASSERT_OK(union_builder->Finish(&actual));
+    ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
+  }
+
+  void AppendInferred() {
+    I8 = union_builder->AppendChild(i8_builder, "i8");
+    ASSERT_EQ(I8, 0);
+    AppendInt(33);
+    AppendInt(10);
+
+    STR = union_builder->AppendChild(str_builder, "str");
+    ASSERT_EQ(STR, 1);
+    AppendString("abc");
+    AppendString("");
+    AppendString("def");
+    AppendInt(-10);
+
+    DBL = union_builder->AppendChild(dbl_builder, "dbl");
+    ASSERT_EQ(DBL, 2);
+    AppendDouble(1.0);
+    AppendDouble(-1.0);
+    AppendDouble(0.5);
+    ASSERT_OK(union_builder->Finish(&actual));
+    ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
+
+    ASSERT_EQ(I8, 0);
+    ASSERT_EQ(STR, 1);
+    ASSERT_EQ(DBL, 2);
+  }
+
+  void AppendListOfInferred(std::shared_ptr<ListArray>* actual) {
+    ListBuilder list_builder(default_memory_pool(), union_builder);
+
+    ASSERT_OK(list_builder.Append());
+    I8 = union_builder->AppendChild(i8_builder, "i8");
+    ASSERT_EQ(I8, 0);
+    AppendInt(10);
+
+    ASSERT_OK(list_builder.Append());
+    STR = union_builder->AppendChild(str_builder, "str");
+    ASSERT_EQ(STR, 1);
+    AppendString("abc");
+    AppendInt(-10);
+
+    ASSERT_OK(list_builder.Append());
+    DBL = union_builder->AppendChild(dbl_builder, "dbl");
+    ASSERT_EQ(DBL, 2);
+    AppendDouble(0.5);
+
+    ASSERT_OK(list_builder.Finish(actual));
+    ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
+
+    ASSERT_EQ(I8, 0);
+    ASSERT_EQ(STR, 1);
+    ASSERT_EQ(DBL, 2);
+  }
+
+  std::vector<uint8_t> expected_types_vector;
+  std::shared_ptr<Array> expected_types;
+  std::shared_ptr<Int8Builder> i8_builder = std::make_shared<Int8Builder>();
+  std::shared_ptr<StringBuilder> str_builder = std::make_shared<StringBuilder>();
+  std::shared_ptr<DoubleBuilder> dbl_builder = std::make_shared<DoubleBuilder>();
+  std::shared_ptr<B> union_builder{new B(default_memory_pool())};
+  std::shared_ptr<UnionArray> actual;
+};
+
+class DenseUnionBuilderTest : public UnionBuilderTest<DenseUnionBuilder> {};
+class SparseUnionBuilderTest : public UnionBuilderTest<SparseUnionBuilder> {
+ public:
+  using Base = UnionBuilderTest<SparseUnionBuilder>;
+
+  void AppendInt(int8_t i) override {
+    Base::AppendInt(i);
+    ASSERT_OK(str_builder->AppendNull());
+    ASSERT_OK(dbl_builder->AppendNull());
+  }
+
+  void AppendString(const std::string& str) override {
+    Base::AppendString(str);
+    ASSERT_OK(i8_builder->AppendNull());
+    ASSERT_OK(dbl_builder->AppendNull());
+  }
+
+  void AppendDouble(double dbl) override {
+    Base::AppendDouble(dbl);
+    ASSERT_OK(i8_builder->AppendNull());
+    ASSERT_OK(str_builder->AppendNull());
+  }
+};
+
+TEST_F(DenseUnionBuilderTest, Basics) {
+  union_builder.reset(new DenseUnionBuilder(
+      default_memory_pool(), {i8_builder, str_builder, dbl_builder},
+      union_({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
+             {I8, STR, DBL}, UnionMode::DENSE)));
+  AppendBasics();
+
+  auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]");
+  auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])");
+  auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]");
+
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 1, 1, 2, 2, 2]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeDense(*expected_types, *expected_offsets,
+                                  {expected_i8, expected_str, expected_dbl},
+                                  {"i8", "str", "dbl"}, {I8, STR, DBL}, &expected));
+
+  ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
+  ASSERT_ARRAYS_EQUAL(*expected, *actual);
+}
+
+TEST_F(DenseUnionBuilderTest, InferredType) {
+  AppendInferred();
+
+  auto expected_i8 = ArrayFromJSON(int8(), "[33, 10, -10]");
+  auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "def"])");
+  auto expected_dbl = ArrayFromJSON(float64(), "[1.0, -1.0, 0.5]");
+
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 1, 0, 1, 2, 2, 0, 1, 2]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeDense(*expected_types, *expected_offsets,
+                                  {expected_i8, expected_str, expected_dbl},
+                                  {"i8", "str", "dbl"}, {I8, STR, DBL}, &expected));
+
+  ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
+  ASSERT_ARRAYS_EQUAL(*expected, *actual);
+}
+
+TEST_F(DenseUnionBuilderTest, ListOfInferredType) {
+  std::shared_ptr<ListArray> actual;
+  AppendListOfInferred(&actual);
+
+  auto expected_type =
+      list(union_({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
+                  {I8, STR, DBL}, UnionMode::DENSE));
+  ASSERT_EQ(expected_type->ToString(), actual->type()->ToString());
+}
+
+TEST_F(SparseUnionBuilderTest, Basics) {
+  union_builder.reset(new SparseUnionBuilder(
+      default_memory_pool(), {i8_builder, str_builder, dbl_builder},
+      union_({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
+             {I8, STR, DBL}, UnionMode::SPARSE)));
+
+  AppendBasics();
+
+  auto expected_i8 =
+      ArrayFromJSON(int8(), "[33, null, null, null, null, 10, null, -10, null]");
+  auto expected_str =
+      ArrayFromJSON(utf8(), R"([null, "abc", null, null, "",  null, "def", null, null])");
+  auto expected_dbl =
+      ArrayFromJSON(float64(), "[null, null, 1.0, -1.0, null, null, null, null, 0.5]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeSparse(*expected_types,
+                                   {expected_i8, expected_str, expected_dbl},
+                                   {"i8", "str", "dbl"}, {I8, STR, DBL}, &expected));
+
+  ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
+  ASSERT_ARRAYS_EQUAL(*expected, *actual);
+}
+
+TEST_F(SparseUnionBuilderTest, InferredType) {
+  AppendInferred();
+
+  auto expected_i8 =
+      ArrayFromJSON(int8(), "[33, 10, null, null, null, -10, null, null, null]");
+  auto expected_str =
+      ArrayFromJSON(utf8(), R"([null, null, "abc", "", "def",  null, null, null, null])");
+  auto expected_dbl =
+      ArrayFromJSON(float64(), "[null, null, null, null, null, null, 1.0, -1.0, 0.5]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeSparse(*expected_types,
+                                   {expected_i8, expected_str, expected_dbl},
+                                   {"i8", "str", "dbl"}, {I8, STR, DBL}, &expected));
+
+  ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
+  ASSERT_ARRAYS_EQUAL(*expected, *actual);
+}
 }  // namespace arrow
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 0f63aba11d3..0b7d8f170cb 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -31,6 +31,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
+#include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -385,31 +386,26 @@ BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
   SetData(data);
 }
 
-void BinaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
-  ARROW_CHECK_EQ(data->buffers.size(), 3);
-  auto value_offsets = data->buffers[1];
-  auto value_data = data->buffers[2];
-  this->Array::SetData(data);
-  raw_data_ = value_data == nullptr ? nullptr : value_data->data();
-  raw_value_offsets_ = value_offsets == nullptr
-                           ? nullptr
-                           : reinterpret_cast<const int32_t*>(value_offsets->data());
-}
-
 BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
-                         const std::shared_ptr<Buffer>& data,
-                         const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
-                         int64_t offset)
-    : BinaryArray(binary(), length, value_offsets, data, null_bitmap, null_count,
-                  offset) {}
-
-BinaryArray::BinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
-                         const std::shared_ptr<Buffer>& value_offsets,
                          const std::shared_ptr<Buffer>& data,
                          const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
                          int64_t offset) {
-  SetData(ArrayData::Make(type, length, {null_bitmap, value_offsets, data}, null_count,
-                          offset));
+  SetData(ArrayData::Make(binary(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
+}
+
+LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::LARGE_BINARY);
+  SetData(data);
+}
+
+LargeBinaryArray::LargeBinaryArray(int64_t length,
+                                   const std::shared_ptr<Buffer>& value_offsets,
+                                   const std::shared_ptr<Buffer>& data,
+                                   const std::shared_ptr<Buffer>& null_bitmap,
+                                   int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(large_binary(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
 }
 
 StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
@@ -420,8 +416,24 @@ StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
 StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
                          const std::shared_ptr<Buffer>& data,
                          const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
-                         int64_t offset)
-    : BinaryArray(utf8(), length, value_offsets, data, null_bitmap, null_count, offset) {}
+                         int64_t offset) {
+  SetData(ArrayData::Make(utf8(), length, {null_bitmap, value_offsets, data}, null_count,
+                          offset));
+}
+
+LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
+  ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
+  SetData(data);
+}
+
+LargeStringArray::LargeStringArray(int64_t length,
+                                   const std::shared_ptr<Buffer>& value_offsets,
+                                   const std::shared_ptr<Buffer>& data,
+                                   const std::shared_ptr<Buffer>& null_bitmap,
+                                   int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(large_utf8(), length, {null_bitmap, value_offsets, data},
+                          null_count, offset));
+}
 
 // ----------------------------------------------------------------------
 // Fixed width binary
@@ -530,7 +542,8 @@ const StructType* StructArray::struct_type() const {
 }
 
 std::shared_ptr<Array> StructArray::field(int i) const {
-  if (!boxed_fields_[i]) {
+  std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
+  if (!result) {
     std::shared_ptr<ArrayData> field_data;
     if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
       field_data = std::make_shared<ArrayData>(
@@ -538,9 +551,10 @@ std::shared_ptr<Array> StructArray::field(int i) const {
     } else {
       field_data = data_->child_data[i];
     }
-    boxed_fields_[i] = MakeArray(field_data);
+    result = MakeArray(field_data);
+    internal::atomic_store(&boxed_fields_[i], result);
   }
-  return boxed_fields_[i];
+  return result;
 }
 
 std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
@@ -603,6 +617,7 @@ void UnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
 
   ARROW_CHECK_EQ(data->type->id(), Type::UNION);
   ARROW_CHECK_EQ(data->buffers.size(), 3);
+  union_type_ = checked_cast<const UnionType*>(data_->type.get());
 
   auto type_ids = data_->buffers[1];
   auto value_offsets = data_->buffers[2];
@@ -709,7 +724,8 @@ Status UnionArray::MakeSparse(const Array& type_ids,
 }
 
 std::shared_ptr<Array> UnionArray::child(int i) const {
-  if (!boxed_fields_[i]) {
+  std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
+  if (!result) {
     std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
     if (mode() == UnionMode::SPARSE) {
       // Sparse union: need to adjust child if union is sliced
@@ -719,16 +735,10 @@ std::shared_ptr<Array> UnionArray::child(int i) const {
         *child_data = child_data->Slice(data_->offset, data_->length);
       }
     }
-    boxed_fields_[i] = MakeArray(child_data);
+    result = MakeArray(child_data);
+    internal::atomic_store(&boxed_fields_[i], result);
   }
-  return boxed_fields_[i];
-}
-
-const Array* UnionArray::UnsafeChild(int i) const {
-  if (!boxed_fields_[i]) {
-    boxed_fields_[i] = MakeArray(data_->child_data[i]);
-  }
-  return boxed_fields_[i].get();
+  return result;
 }
 
 // ----------------------------------------------------------------------
@@ -1149,20 +1159,14 @@ struct ValidateVisitor {
     return ValidateOffsets(array);
   }
 
-  Status Visit(const ListArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    auto value_offsets = array.value_offsets();
-    if (array.length() && !value_offsets) {
-      return Status::Invalid("value_offsets_ was null");
-    }
-    if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
-      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
-                             " isn't large enough for length: ", array.length());
+  Status Visit(const LargeBinaryArray& array) {
+    if (array.data()->buffers.size() != 3) {
+      return Status::Invalid("number of buffers was != 3");
     }
+    return ValidateOffsets(array);
+  }
 
+  Status Visit(const ListArray& array) {
     if (!array.values()) {
       return Status::Invalid("values was null");
     }
@@ -1182,19 +1186,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const MapArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    auto value_offsets = array.value_offsets();
-    if (array.length() && !value_offsets) {
-      return Status::Invalid("value_offsets_ was null");
-    }
-    if (value_offsets->size() / static_cast<int>(sizeof(int32_t)) < array.length()) {
-      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
-                             " isn't large enough for length: ", array.length());
-    }
-
     if (!array.keys()) {
       return Status::Invalid("keys was null");
     }
@@ -1225,9 +1216,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const FixedSizeListArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
     if (!array.values()) {
       return Status::Invalid("values was null");
     }
@@ -1241,14 +1229,6 @@ struct ValidateVisitor {
   }
 
   Status Visit(const StructArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    if (array.null_count() > array.length()) {
-      return Status::Invalid("Null count exceeds the length of this struct");
-    }
-
     if (array.num_fields() > 0) {
       // Validate fields
       int64_t array_length = array.field(0)->length();
@@ -1275,16 +1255,7 @@ struct ValidateVisitor {
     return Status::OK();
   }
 
-  Status Visit(const UnionArray& array) {
-    if (array.length() < 0) {
-      return Status::Invalid("Length was negative");
-    }
-
-    if (array.null_count() > array.length()) {
-      return Status::Invalid("Null count exceeds the length of this struct");
-    }
-    return Status::OK();
-  }
+  Status Visit(const UnionArray& array) { return Status::OK(); }
 
   Status Visit(const DictionaryArray& array) {
     Type::type index_type_id = array.indices()->type()->id();
@@ -1311,12 +1282,23 @@ struct ValidateVisitor {
  protected:
   template <typename ArrayType>
   Status ValidateOffsets(ArrayType& array) {
-    int32_t prev_offset = array.value_offset(0);
+    using offset_type = typename ArrayType::offset_type;
+
+    auto value_offsets = array.value_offsets();
+    if (array.length() && !value_offsets) {
+      return Status::Invalid("value_offsets_ was null");
+    }
+    if (value_offsets->size() / static_cast<int>(sizeof(offset_type)) < array.length()) {
+      return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
+                             " isn't large enough for length: ", array.length());
+    }
+
+    auto prev_offset = array.value_offset(0);
     if (array.offset() == 0 && prev_offset != 0) {
       return Status::Invalid("The first offset wasn't zero");
     }
     for (int64_t i = 1; i <= array.length(); ++i) {
-      int32_t current_offset = array.value_offset(i);
+      auto current_offset = array.value_offset(i);
       if (array.IsNull(i - 1) && current_offset != prev_offset) {
         return Status::Invalid("Offset invariant failure at: ", i,
                                " inconsistent value_offsets for null slot",
@@ -1341,6 +1323,14 @@ Status ValidateArray(const Array& array) {
   const auto layout = type.layout();
   const ArrayData& data = *array.data();
 
+  if (array.length() < 0) {
+    return Status::Invalid("Array length is negative");
+  }
+
+  if (array.null_count() > array.length()) {
+    return Status::Invalid("Null count exceeds array length");
+  }
+
   if (data.buffers.size() != layout.bit_widths.size()) {
     return Status::Invalid("Expected ", layout.bit_widths.size(),
                            " buffers in array "
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 2a1ce7aae6b..e13088c65c7 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -492,6 +492,7 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
 class ARROW_EXPORT ListArray : public Array {
  public:
   using TypeClass = ListType;
+  using offset_type = ListType::offset_type;
 
   explicit ListArray(const std::shared_ptr<ArrayData>& data);
 
@@ -635,24 +636,20 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
 // ----------------------------------------------------------------------
 // Binary and String
 
-/// Concrete Array class for variable-size binary data
-class ARROW_EXPORT BinaryArray : public FlatArray {
+/// Base class for variable-sized binary arrays, regardless of offset size
+/// and logical interpretation.
+template <typename TYPE>
+class BaseBinaryArray : public FlatArray {
  public:
-  using TypeClass = BinaryType;
-
-  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
-
-  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
-              const std::shared_ptr<Buffer>& data,
-              const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
-              int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
 
   /// Return the pointer to the given elements bytes
   // XXX should GetValue(int64_t i) return a string_view?
-  const uint8_t* GetValue(int64_t i, int32_t* out_length) const {
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
     // Account for base offset
     i += data_->offset;
-    const int32_t pos = raw_value_offsets_[i];
+    const offset_type pos = raw_value_offsets_[i];
     *out_length = raw_value_offsets_[i + 1] - pos;
     return raw_data_ + pos;
   }
@@ -664,7 +661,7 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
   util::string_view GetView(int64_t i) const {
     // Account for base offset
     i += data_->offset;
-    const int32_t pos = raw_value_offsets_[i];
+    const offset_type pos = raw_value_offsets_[i];
     return util::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
                              raw_value_offsets_[i + 1] - pos);
   }
@@ -681,31 +678,52 @@ class ARROW_EXPORT BinaryArray : public FlatArray {
   /// Note that this buffer does not account for any slice offset
   std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
 
-  const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+  const offset_type* raw_value_offsets() const {
+    return raw_value_offsets_ + data_->offset;
+  }
 
   // Neither of these functions will perform boundschecking
-  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
-  int32_t value_length(int64_t i) const {
+  offset_type value_offset(int64_t i) const {
+    return raw_value_offsets_[i + data_->offset];
+  }
+  offset_type value_length(int64_t i) const {
     i += data_->offset;
     return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
   }
 
  protected:
   // For subclasses
-  BinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
+  BaseBinaryArray() : raw_value_offsets_(NULLPTR), raw_data_(NULLPTR) {}
 
-  /// Protected method for constructors
-  void SetData(const std::shared_ptr<ArrayData>& data);
+  // Protected method for constructors
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    auto value_offsets = data->buffers[1];
+    auto value_data = data->buffers[2];
+    this->Array::SetData(data);
+    raw_data_ = value_data == NULLPTR ? NULLPTR : value_data->data();
+    raw_value_offsets_ =
+        value_offsets == NULLPTR
+            ? NULLPTR
+            : reinterpret_cast<const offset_type*>(value_offsets->data());
+  }
+
+  const offset_type* raw_value_offsets_;
+  const uint8_t* raw_data_;
+};
 
-  // Constructor to allow sub-classes/builders to substitute their own logical type
-  BinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
-              const std::shared_ptr<Buffer>& value_offsets,
+/// Concrete Array class for variable-size binary data
+class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
+ public:
+  explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
               const std::shared_ptr<Buffer>& data,
               const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
-  const int32_t* raw_value_offsets_;
-  const uint8_t* raw_data_;
+ protected:
+  // For subclasses such as StringArray
+  BinaryArray() : BaseBinaryArray() {}
 };
 
 /// Concrete Array class for variable-size string (utf-8) data
@@ -721,6 +739,34 @@ class ARROW_EXPORT StringArray : public BinaryArray {
               int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 };
 
+/// Concrete Array class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
+ public:
+  explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
+ protected:
+  // For subclasses such as LargeStringArray
+  LargeBinaryArray() : BaseBinaryArray() {}
+};
+
+/// Concrete Array class for large variable-size string (utf-8) data
+class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
+ public:
+  using TypeClass = LargeStringType;
+
+  explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
+
+  LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
+                   const std::shared_ptr<Buffer>& data,
+                   const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+};
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 
@@ -1032,9 +1078,9 @@ class ARROW_EXPORT UnionArray : public Array {
   const type_id_t* raw_type_ids() const { return raw_type_ids_ + data_->offset; }
   const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
 
-  UnionMode::type mode() const {
-    return internal::checked_cast<const UnionType&>(*type()).mode();
-  }
+  const UnionType* union_type() const { return union_type_; }
+
+  UnionMode::type mode() const { return union_type_->mode(); }
 
   // Return the given field as an individual array.
   // For sparse unions, the returned array has its offset, length and null
@@ -1042,14 +1088,12 @@ class ARROW_EXPORT UnionArray : public Array {
   // For dense unions, the returned array is unchanged.
   std::shared_ptr<Array> child(int pos) const;
 
-  /// Only use this while the UnionArray is in scope
-  const Array* UnsafeChild(int pos) const;
-
  protected:
   void SetData(const std::shared_ptr<ArrayData>& data);
 
   const type_id_t* raw_type_ids_;
   const int32_t* raw_value_offsets_;
+  const UnionType* union_type_;
 
   // For caching boxed child data
   mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 818ad155996..b83897d7e19 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -43,173 +43,15 @@ using internal::checked_cast;
 // ----------------------------------------------------------------------
 // String and binary
 
-BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
-    : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
-
-BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
-
-Status BinaryBuilder::Resize(int64_t capacity) {
-  if (capacity > kListMaximumElements) {
-    return Status::CapacityError(
-        "BinaryBuilder cannot reserve space for more then 2^31 - 1 child elements, got ",
-        capacity);
-  }
-  RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
-
-  // one more then requested for offsets
-  RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
-  return ArrayBuilder::Resize(capacity);
-}
-
-Status BinaryBuilder::ReserveData(int64_t elements) {
-  const int64_t size = value_data_length() + elements;
-  ARROW_RETURN_IF(
-      size > kBinaryMemoryLimit,
-      Status::CapacityError("Cannot reserve capacity larger than 2^31 - 1 for binary"));
-
-  return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
-                                        : Status::OK();
-}
-
-Status BinaryBuilder::AppendOverflow(int64_t num_bytes) {
-  return Status::CapacityError("BinaryArray cannot contain more than ",
-                               kBinaryMemoryLimit, " bytes, have ", num_bytes);
-}
-
-Status BinaryBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  // Write final offset (values length)
-  RETURN_NOT_OK(AppendNextOffset());
-
-  // These buffers' padding zeroed by BufferBuilder
-  std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
-  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
-  RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
-
-  *out =
-      ArrayData::Make(type_, length_, {null_bitmap, offsets, value_data}, null_count_, 0);
-  Reset();
-  return Status::OK();
-}
-
-void BinaryBuilder::Reset() {
-  ArrayBuilder::Reset();
-  offsets_builder_.Reset();
-  value_data_builder_.Reset();
-}
-
-const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  if (i == (length_ - 1)) {
-    *out_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    *out_length = offsets[i + 1] - offset;
-  }
-  return value_data_builder_.data() + offset;
-}
-
-util::string_view BinaryBuilder::GetView(int64_t i) const {
-  const int32_t* offsets = offsets_builder_.data();
-  int32_t offset = offsets[i];
-  int32_t value_length;
-  if (i == (length_ - 1)) {
-    value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
-  } else {
-    value_length = offsets[i + 1] - offset;
-  }
-  return util::string_view(
-      reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
-}
+BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BaseBinaryBuilder(binary(), pool) {}
 
 StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
 
-Status StringBuilder::AppendValues(const std::vector<std::string>& values,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = std::accumulate(
-      values.begin(), values.end(), 0ULL,
-      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
-  RETURN_NOT_OK(Reserve(values.size()));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
-
-  if (valid_bytes) {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      UnsafeAppendNextOffset();
-      if (valid_bytes[i]) {
-        value_data_builder_.UnsafeAppend(
-            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
-      }
-    }
-  } else {
-    for (std::size_t i = 0; i < values.size(); ++i) {
-      UnsafeAppendNextOffset();
-      value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i].data()),
-                                       values[i].size());
-    }
-  }
+LargeBinaryBuilder::LargeBinaryBuilder(MemoryPool* pool)
+    : BaseBinaryBuilder(large_binary(), pool) {}
 
-  UnsafeAppendToBitmap(valid_bytes, values.size());
-  return Status::OK();
-}
-
-Status StringBuilder::AppendValues(const char** values, int64_t length,
-                                   const uint8_t* valid_bytes) {
-  std::size_t total_length = 0;
-  std::vector<std::size_t> value_lengths(length);
-  bool have_null_value = false;
-  for (int64_t i = 0; i < length; ++i) {
-    if (values[i]) {
-      auto value_length = strlen(values[i]);
-      value_lengths[i] = value_length;
-      total_length += value_length;
-    } else {
-      have_null_value = true;
-    }
-  }
-  RETURN_NOT_OK(Reserve(length));
-  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
-  RETURN_NOT_OK(offsets_builder_.Reserve(length));
-
-  if (valid_bytes) {
-    int64_t valid_bytes_offset = 0;
-    for (int64_t i = 0; i < length; ++i) {
-      UnsafeAppendNextOffset();
-      if (valid_bytes[i]) {
-        if (values[i]) {
-          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                           value_lengths[i]);
-        } else {
-          UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - valid_bytes_offset);
-          UnsafeAppendToBitmap(false);
-          valid_bytes_offset = i + 1;
-        }
-      }
-    }
-    UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
-  } else {
-    if (have_null_value) {
-      std::vector<uint8_t> valid_vector(length, 0);
-      for (int64_t i = 0; i < length; ++i) {
-        UnsafeAppendNextOffset();
-        if (values[i]) {
-          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                           value_lengths[i]);
-          valid_vector[i] = 1;
-        }
-      }
-      UnsafeAppendToBitmap(valid_vector.data(), length);
-    } else {
-      for (int64_t i = 0; i < length; ++i) {
-        UnsafeAppendNextOffset();
-        value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
-                                         value_lengths[i]);
-      }
-      UnsafeAppendToBitmap(nullptr, length);
-    }
-  }
-  return Status::OK();
-}
+LargeStringBuilder::LargeStringBuilder(MemoryPool* pool)
+    : LargeBinaryBuilder(large_utf8(), pool) {}
 
 // ----------------------------------------------------------------------
 // Fixed width binary
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 47d3bae4b89..5bf4e747b52 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -17,8 +17,11 @@
 
 #pragma once
 
+#include <algorithm>
+#include <cstdint>
 #include <limits>
 #include <memory>
+#include <numeric>
 #include <string>
 #include <vector>
 
@@ -37,15 +40,16 @@ constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
 // ----------------------------------------------------------------------
 // Binary and String
 
-/// \class BinaryBuilder
-/// \brief Builder class for variable-length binary data
-class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
+template <typename TYPE>
+class BaseBinaryBuilder : public ArrayBuilder {
  public:
-  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+  using TypeClass = TYPE;
+  using offset_type = typename TypeClass::offset_type;
 
-  BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
+  BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
+      : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
 
-  Status Append(const uint8_t* value, int32_t length) {
+  Status Append(const uint8_t* value, offset_type length) {
     ARROW_RETURN_NOT_OK(Reserve(1));
     ARROW_RETURN_NOT_OK(AppendNextOffset());
     // Safety check for UBSAN.
@@ -57,14 +61,22 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
+  Status Append(const char* value, offset_type length) {
+    return Append(reinterpret_cast<const uint8_t*>(value), length);
+  }
+
+  Status Append(util::string_view value) {
+    return Append(value.data(), static_cast<offset_type>(value.size()));
+  }
+
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
       return AppendOverflow(num_bytes);
     }
     ARROW_RETURN_NOT_OK(Reserve(length));
     for (int64_t i = 0; i < length; ++i) {
-      offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+      offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     }
     UnsafeAppendToBitmap(length, false);
     return Status::OK();
@@ -77,56 +89,184 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
-  Status Append(const char* value, int32_t length) {
-    return Append(reinterpret_cast<const uint8_t*>(value), length);
-  }
-
-  Status Append(util::string_view value) {
-    return Append(value.data(), static_cast<int32_t>(value.size()));
-  }
-
   /// \brief Append without checking capacity
   ///
   /// Offsets and data should have been presized using Reserve() and
   /// ReserveData(), respectively.
-  void UnsafeAppend(const uint8_t* value, int32_t length) {
+  void UnsafeAppend(const uint8_t* value, offset_type length) {
     UnsafeAppendNextOffset();
     value_data_builder_.UnsafeAppend(value, length);
     UnsafeAppendToBitmap(true);
   }
 
-  void UnsafeAppend(const char* value, int32_t length) {
+  void UnsafeAppend(const char* value, offset_type length) {
     UnsafeAppend(reinterpret_cast<const uint8_t*>(value), length);
   }
 
   void UnsafeAppend(const std::string& value) {
-    UnsafeAppend(value.c_str(), static_cast<int32_t>(value.size()));
+    UnsafeAppend(value.c_str(), static_cast<offset_type>(value.size()));
   }
 
   void UnsafeAppend(util::string_view value) {
-    UnsafeAppend(value.data(), static_cast<int32_t>(value.size()));
+    UnsafeAppend(value.data(), static_cast<offset_type>(value.size()));
   }
 
   void UnsafeAppendNull() {
     const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
     UnsafeAppendToBitmap(false);
   }
 
-  void Reset() override;
-  Status Resize(int64_t capacity) override;
+  /// \brief Append a sequence of strings in one shot.
+  ///
+  /// \param[in] values a vector of strings
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const std::vector<std::string>& values,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = std::accumulate(
+        values.begin(), values.end(), 0ULL,
+        [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+    ARROW_RETURN_NOT_OK(Reserve(values.size()));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+    if (valid_bytes) {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          value_data_builder_.UnsafeAppend(
+              reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+        }
+      }
+    } else {
+      for (std::size_t i = 0; i < values.size(); ++i) {
+        UnsafeAppendNextOffset();
+        value_data_builder_.UnsafeAppend(
+            reinterpret_cast<const uint8_t*>(values[i].data()), values[i].size());
+      }
+    }
+
+    UnsafeAppendToBitmap(valid_bytes, values.size());
+    return Status::OK();
+  }
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status AppendValues(const char** values, int64_t length,
+                      const uint8_t* valid_bytes = NULLPTR) {
+    std::size_t total_length = 0;
+    std::vector<std::size_t> value_lengths(length);
+    bool have_null_value = false;
+    for (int64_t i = 0; i < length; ++i) {
+      if (values[i]) {
+        auto value_length = strlen(values[i]);
+        value_lengths[i] = value_length;
+        total_length += value_length;
+      } else {
+        have_null_value = true;
+      }
+    }
+    ARROW_RETURN_NOT_OK(Reserve(length));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(length));
+
+    if (valid_bytes) {
+      int64_t valid_bytes_offset = 0;
+      for (int64_t i = 0; i < length; ++i) {
+        UnsafeAppendNextOffset();
+        if (valid_bytes[i]) {
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+          } else {
+            UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset,
+                                 i - valid_bytes_offset);
+            UnsafeAppendToBitmap(false);
+            valid_bytes_offset = i + 1;
+          }
+        }
+      }
+      UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - valid_bytes_offset);
+    } else {
+      if (have_null_value) {
+        std::vector<uint8_t> valid_vector(length, 0);
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          if (values[i]) {
+            value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                             value_lengths[i]);
+            valid_vector[i] = 1;
+          }
+        }
+        UnsafeAppendToBitmap(valid_vector.data(), length);
+      } else {
+        for (int64_t i = 0; i < length; ++i) {
+          UnsafeAppendNextOffset();
+          value_data_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values[i]),
+                                           value_lengths[i]);
+        }
+        UnsafeAppendToBitmap(NULLPTR, length);
+      }
+    }
+    return Status::OK();
+  }
+
+  void Reset() override {
+    ArrayBuilder::Reset();
+    offsets_builder_.Reset();
+    value_data_builder_.Reset();
+  }
+
+  Status Resize(int64_t capacity) override {
+    if (capacity > kListMaximumElements) {
+      return Status::CapacityError(
+          "BinaryBuilder cannot reserve space for more than 2^31 - 1 child elements, "
+          "got ",
+          capacity);
+    }
+    ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
+
+    // One more than requested for offsets
+    ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
+    return ArrayBuilder::Resize(capacity);
+  }
 
   /// \brief Ensures there is enough allocated capacity to append the indicated
   /// number of bytes to the value data buffer without additional allocations
-  Status ReserveData(int64_t elements);
+  Status ReserveData(int64_t elements) {
+    const int64_t size = value_data_length() + elements;
+    ARROW_RETURN_IF(size > memory_limit(),
+                    Status::CapacityError("Cannot reserve capacity larger than ",
+                                          memory_limit(), " bytes"));
+
+    return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
+                                          : Status::OK();
+  }
 
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    // Write final offset (values length)
+    ARROW_RETURN_NOT_OK(AppendNextOffset());
 
-  /// \cond FALSE
-  using ArrayBuilder::Finish;
-  /// \endcond
+    // These buffers' padding zeroed by BufferBuilder
+    std::shared_ptr<Buffer> offsets, value_data, null_bitmap;
+    ARROW_RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
+    ARROW_RETURN_NOT_OK(value_data_builder_.Finish(&value_data));
+    ARROW_RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
 
-  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+    *out = ArrayData::Make(type_, length_, {null_bitmap, offsets, value_data},
+                           null_count_, 0);
+    Reset();
+    return Status::OK();
+  }
 
   /// \return size of values buffer so far
   int64_t value_data_length() const { return value_data_builder_.length(); }
@@ -136,33 +276,77 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
   /// Temporary access to a value.
   ///
   /// This pointer becomes invalid on the next modifying operation.
-  const uint8_t* GetValue(int64_t i, int32_t* out_length) const;
+  const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
+    const offset_type* offsets = offsets_builder_.data();
+    const auto offset = offsets[i];
+    if (i == (length_ - 1)) {
+      *out_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+    } else {
+      *out_length = offsets[i + 1] - offset;
+    }
+    return value_data_builder_.data() + offset;
+  }
 
   /// Temporary access to a value.
   ///
   /// This view becomes invalid on the next modifying operation.
-  util::string_view GetView(int64_t i) const;
+  util::string_view GetView(int64_t i) const {
+    const offset_type* offsets = offsets_builder_.data();
+    const auto offset = offsets[i];
+    offset_type value_length;
+    if (i == (length_ - 1)) {
+      value_length = static_cast<offset_type>(value_data_builder_.length()) - offset;
+    } else {
+      value_length = offsets[i + 1] - offset;
+    }
+    return util::string_view(
+        reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
+  }
 
  protected:
-  TypedBufferBuilder<int32_t> offsets_builder_;
+  TypedBufferBuilder<offset_type> offsets_builder_;
   TypedBufferBuilder<uint8_t> value_data_builder_;
 
-  Status AppendOverflow(int64_t num_bytes);
+  Status AppendOverflow(int64_t num_bytes) {
+    return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                 " bytes, have ", num_bytes);
+  }
 
   Status AppendNextOffset() {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
+    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
       return AppendOverflow(num_bytes);
     }
-    return offsets_builder_.Append(static_cast<int32_t>(num_bytes));
+    return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
   }
 
   void UnsafeAppendNextOffset() {
     const int64_t num_bytes = value_data_builder_.length();
-    offsets_builder_.UnsafeAppend(static_cast<int32_t>(num_bytes));
+    offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
+  }
+
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t memory_limit() {
+    return std::numeric_limits<offset_type>::max() - 1;
   }
 };
 
+/// \class BinaryBuilder
+/// \brief Builder class for variable-length binary data
+class ARROW_EXPORT BinaryBuilder : public BaseBinaryBuilder<BinaryType> {
+ public:
+  explicit BinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<BinaryArray>* out) { return FinishTyped(out); }
+
+ protected:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+};
+
 /// \class StringBuilder
 /// \brief Builder class for UTF8 strings
 class ARROW_EXPORT StringBuilder : public BinaryBuilder {
@@ -170,36 +354,41 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
   using BinaryBuilder::BinaryBuilder;
   explicit StringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
 
-  using BinaryBuilder::Append;
-  using BinaryBuilder::Reset;
-  using BinaryBuilder::UnsafeAppend;
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
 
-  /// \brief Append a sequence of strings in one shot.
-  ///
-  /// \param[in] values a vector of strings
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const std::vector<std::string>& values,
-                      const uint8_t* valid_bytes = NULLPTR);
+  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+};
 
-  /// \brief Append a sequence of nul-terminated strings in one shot.
-  ///        If one of the values is NULL, it is processed as a null
-  ///        value even if the corresponding valid_bytes entry is 1.
-  ///
-  /// \param[in] values a contiguous C array of nul-terminated char *
-  /// \param[in] length the number of values to append
-  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
-  /// indicates a valid (non-null) value
-  /// \return Status
-  Status AppendValues(const char** values, int64_t length,
-                      const uint8_t* valid_bytes = NULLPTR);
+/// \class LargeBinaryBuilder
+/// \brief Builder class for large variable-length binary data
+class ARROW_EXPORT LargeBinaryBuilder : public BaseBinaryBuilder<LargeBinaryType> {
+ public:
+  explicit LargeBinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
 
   /// \cond FALSE
   using ArrayBuilder::Finish;
   /// \endcond
 
-  Status Finish(std::shared_ptr<StringArray>* out) { return FinishTyped(out); }
+  Status Finish(std::shared_ptr<LargeBinaryArray>* out) { return FinishTyped(out); }
+
+ protected:
+  using BaseBinaryBuilder::BaseBinaryBuilder;
+};
+
+/// \class LargeStringBuilder
+/// \brief Builder class for large UTF8 strings
+class ARROW_EXPORT LargeStringBuilder : public LargeBinaryBuilder {
+ public:
+  using LargeBinaryBuilder::LargeBinaryBuilder;
+  explicit LargeStringBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<LargeStringArray>* out) { return FinishTyped(out); }
 };
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index 0bf91719dfe..30b3fc05e59 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -60,7 +60,7 @@ Status ListBuilder::CheckNextOffset() const {
   const int64_t num_values = value_builder_->length();
   ARROW_RETURN_IF(
       num_values > kListMaximumElements,
-      Status::CapacityError("ListArray cannot contain more then 2^31 - 1 child elements,",
+      Status::CapacityError("ListArray cannot contain more than 2^31 - 1 child elements,",
                             " have ", num_values));
   return Status::OK();
 }
@@ -300,7 +300,7 @@ Status FixedSizeListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
 // Struct
 
 StructBuilder::StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                             std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders)
+                             std::vector<std::shared_ptr<ArrayBuilder>> field_builders)
     : ArrayBuilder(type, pool) {
   children_ = std::move(field_builders);
 }
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index de031459181..8742f2b6e24 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -218,7 +218,7 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
 class ARROW_EXPORT StructBuilder : public ArrayBuilder {
  public:
   StructBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                std::vector<std::shared_ptr<ArrayBuilder>>&& field_builders);
+                std::vector<std::shared_ptr<ArrayBuilder>> field_builders);
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
 
diff --git a/cpp/src/arrow/array/builder_union.cc b/cpp/src/arrow/array/builder_union.cc
index f51b7d7f020..8de786f6afa 100644
--- a/cpp/src/arrow/array/builder_union.cc
+++ b/cpp/src/arrow/array/builder_union.cc
@@ -17,44 +17,101 @@
 
 #include "arrow/array/builder_union.h"
 
+#include <limits>
 #include <utility>
 
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
 
-DenseUnionBuilder::DenseUnionBuilder(MemoryPool* pool,
-                                     const std::shared_ptr<DataType>& type)
-    : ArrayBuilder(type, pool), types_builder_(pool), offsets_builder_(pool) {}
+using internal::checked_cast;
 
-Status DenseUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
-  std::shared_ptr<Buffer> types;
+Status BasicUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
+  std::shared_ptr<Buffer> types, null_bitmap;
+  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
   RETURN_NOT_OK(types_builder_.Finish(&types));
-  std::shared_ptr<Buffer> offsets;
-  RETURN_NOT_OK(offsets_builder_.Finish(&offsets));
 
-  std::shared_ptr<Buffer> null_bitmap;
-  RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+  // If the type has not been specified in the constructor, gather type_codes
+  std::vector<uint8_t> type_codes;
+  if (type_ == nullptr) {
+    for (size_t i = 0; i < children_.size(); ++i) {
+      if (type_id_to_children_[i] != nullptr) {
+        type_codes.push_back(static_cast<uint8_t>(i));
+      }
+    }
+  } else {
+    type_codes = checked_cast<const UnionType&>(*type_).type_codes();
+  }
+  DCHECK_EQ(type_codes.size(), children_.size());
 
-  std::vector<std::shared_ptr<Field>> fields;
   std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
-  std::vector<uint8_t> type_ids;
   for (size_t i = 0; i < children_.size(); ++i) {
-    std::shared_ptr<ArrayData> data;
-    RETURN_NOT_OK(children_[i]->FinishInternal(&data));
-    child_data[i] = data;
-    fields.push_back(field(field_names_[i], children_[i]->type()));
-    type_ids.push_back(static_cast<uint8_t>(i));
+    RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
   }
 
   // If the type has not been specified in the constructor, infer it
-  if (!type_) {
-    type_ = union_(fields, type_ids, UnionMode::DENSE);
+  if (type_ == nullptr) {
+    std::vector<std::shared_ptr<Field>> fields;
+    auto field_names_it = field_names_.begin();
+    for (auto&& data : child_data) {
+      fields.push_back(field(*field_names_it++, data->type));
+    }
+    type_ = union_(fields, type_codes, mode_);
   }
 
-  *out = ArrayData::Make(type_, length(), {null_bitmap, types, offsets}, null_count_);
+  *out = ArrayData::Make(type_, length(), {null_bitmap, types, nullptr}, null_count_);
   (*out)->child_data = std::move(child_data);
   return Status::OK();
 }
 
+BasicUnionBuilder::BasicUnionBuilder(
+    MemoryPool* pool, UnionMode::type mode,
+    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+    const std::shared_ptr<DataType>& type)
+    : ArrayBuilder(type, pool), mode_(mode), types_builder_(pool) {
+  auto union_type = checked_cast<const UnionType*>(type.get());
+  DCHECK_NE(union_type, nullptr);
+  DCHECK_EQ(union_type->mode(), mode);
+
+  children_ = children;
+  type_id_to_children_.resize(union_type->max_type_code() + 1, nullptr);
+  DCHECK_LT(type_id_to_children_.size(), std::numeric_limits<int8_t>::max());
+
+  auto field_it = type->children().begin();
+  auto children_it = children.begin();
+  for (auto type_id : union_type->type_codes()) {
+    type_id_to_children_[type_id] = *children_it++;
+    field_names_.push_back((*field_it++)->name());
+  }
+  DCHECK_EQ(children_it, children.end());
+  DCHECK_EQ(field_it, type->children().end());
+}
+
+int8_t BasicUnionBuilder::AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+                                      const std::string& field_name) {
+  // force type inferrence in Finish
+  type_ = nullptr;
+
+  field_names_.push_back(field_name);
+  children_.push_back(new_child);
+
+  // Find type_id such that type_id_to_children_[type_id] == nullptr
+  // and use that for the new child. Start searching at dense_type_id_
+  // since type_id_to_children_ is densely packed up at least up to dense_type_id_
+  for (; static_cast<size_t>(dense_type_id_) < type_id_to_children_.size();
+       ++dense_type_id_) {
+    if (type_id_to_children_[dense_type_id_] == nullptr) {
+      type_id_to_children_[dense_type_id_] = new_child;
+      return dense_type_id_++;
+    }
+  }
+
+  DCHECK_LT(type_id_to_children_.size(), std::numeric_limits<int8_t>::max());
+
+  // type_id_to_children_ is already densely packed, so just append the new child
+  type_id_to_children_.push_back(new_child);
+  return dense_type_id_++;
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/builder_union.h b/cpp/src/arrow/array/builder_union.h
index aac2e54f9a2..e04d5d26576 100644
--- a/cpp/src/arrow/array/builder_union.h
+++ b/cpp/src/arrow/array/builder_union.h
@@ -27,25 +27,66 @@
 
 namespace arrow {
 
+class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
+ public:
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+
+  /// \cond FALSE
+  using ArrayBuilder::Finish;
+  /// \endcond
+
+  Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+
+  /// \brief Make a new child builder available to the UnionArray
+  ///
+  /// \param[in] new_child the child builder
+  /// \param[in] field_name the name of the field in the union array type
+  /// if type inference is used
+  /// \return child index, which is the "type" argument that needs
+  /// to be passed to the "Append" method to add a new element to
+  /// the union array.
+  int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& new_child,
+                     const std::string& field_name = "");
+
+ protected:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit BasicUnionBuilder(MemoryPool* pool, UnionMode::type mode)
+      : ArrayBuilder(NULLPTR, pool), mode_(mode), types_builder_(pool) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  BasicUnionBuilder(MemoryPool* pool, UnionMode::type mode,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type);
+
+  UnionMode::type mode_;
+  std::vector<std::shared_ptr<ArrayBuilder>> type_id_to_children_;
+  // for all type_id < dense_type_id_, type_id_to_children_[type_id] != nullptr
+  int8_t dense_type_id_ = 0;
+  TypedBufferBuilder<int8_t> types_builder_;
+  std::vector<std::string> field_names_;
+};
+
 /// \class DenseUnionBuilder
 ///
-/// You need to call AppendChild for each of the children builders you want
-/// to use. The function will return an int8_t, which is the type tag
-/// associated with that child. You can then call Append with that tag
-/// (followed by an append on the child builder) to add elements to
-/// the union array.
-///
-/// You can either specify the type when the UnionBuilder is constructed
-/// or let the UnionBuilder infer the type at runtime (by omitting the
-/// type argument from the constructor).
-///
 /// This API is EXPERIMENTAL.
-class ARROW_EXPORT DenseUnionBuilder : public ArrayBuilder {
+class ARROW_EXPORT DenseUnionBuilder : public BasicUnionBuilder {
  public:
-  /// Use this constructor to incrementally build the union array along
-  /// with types, offsets, and null bitmap.
-  explicit DenseUnionBuilder(MemoryPool* pool,
-                             const std::shared_ptr<DataType>& type = NULLPTR);
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit DenseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, UnionMode::DENSE), offsets_builder_(pool) {}
+
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  DenseUnionBuilder(MemoryPool* pool,
+                    const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                    const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, UnionMode::DENSE, children, type),
+        offsets_builder_(pool) {}
 
   Status AppendNull() final {
     ARROW_RETURN_NOT_OK(types_builder_.Append(0));
@@ -54,53 +95,78 @@ class ARROW_EXPORT DenseUnionBuilder : public ArrayBuilder {
   }
 
   Status AppendNulls(int64_t length) final {
-    ARROW_RETURN_NOT_OK(types_builder_.Reserve(length));
-    ARROW_RETURN_NOT_OK(offsets_builder_.Reserve(length));
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    for (int64_t i = 0; i < length; ++i) {
-      types_builder_.UnsafeAppend(0);
-      offsets_builder_.UnsafeAppend(0);
-    }
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, 0));
+    ARROW_RETURN_NOT_OK(offsets_builder_.Append(length, 0));
     return AppendToBitmap(length, false);
   }
 
   /// \brief Append an element to the UnionArray. This must be followed
   ///        by an append to the appropriate child builder.
-  /// \param[in] type index of the child the value will be appended
-  /// \param[in] offset offset of the value in that child
-  Status Append(int8_t type, int32_t offset) {
-    ARROW_RETURN_NOT_OK(types_builder_.Append(type));
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called.
+  Status Append(int8_t next_type) {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+    if (type_id_to_children_[next_type]->length() == kListMaximumElements) {
+      return Status::CapacityError(
+          "a dense UnionArray cannot contain more than 2^31 - 1 elements from a single "
+          "child");
+    }
+    auto offset = static_cast<int32_t>(type_id_to_children_[next_type]->length());
     ARROW_RETURN_NOT_OK(offsets_builder_.Append(offset));
     return AppendToBitmap(true);
   }
 
-  Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
+  Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
+    ARROW_RETURN_NOT_OK(BasicUnionBuilder::FinishInternal(out));
+    return offsets_builder_.Finish(&(*out)->buffers[2]);
+  }
 
-  /// \cond FALSE
-  using ArrayBuilder::Finish;
-  /// \endcond
+ private:
+  TypedBufferBuilder<int32_t> offsets_builder_;
+};
 
-  Status Finish(std::shared_ptr<UnionArray>* out) { return FinishTyped(out); }
+/// \class SparseUnionBuilder
+///
+/// This API is EXPERIMENTAL.
+class ARROW_EXPORT SparseUnionBuilder : public BasicUnionBuilder {
+ public:
+  /// Use this constructor to initialize the UnionBuilder with no child builders,
+  /// allowing type to be inferred. You will need to call AppendChild for each of the
+  /// children builders you want to use.
+  explicit SparseUnionBuilder(MemoryPool* pool)
+      : BasicUnionBuilder(pool, UnionMode::SPARSE) {}
 
-  /// \brief Make a new child builder available to the UnionArray
-  ///
-  /// \param[in] child the child builder
-  /// \param[in] field_name the name of the field in the union array type
-  /// if type inference is used
-  /// \return child index, which is the "type" argument that needs
-  /// to be passed to the "Append" method to add a new element to
-  /// the union array.
-  int8_t AppendChild(const std::shared_ptr<ArrayBuilder>& child,
-                     const std::string& field_name = "") {
-    children_.push_back(child);
-    field_names_.push_back(field_name);
-    return static_cast<int8_t>(children_.size() - 1);
+  /// Use this constructor to specify the type explicitly.
+  /// You can still add child builders to the union after using this constructor
+  SparseUnionBuilder(MemoryPool* pool,
+                     const std::vector<std::shared_ptr<ArrayBuilder>>& children,
+                     const std::shared_ptr<DataType>& type)
+      : BasicUnionBuilder(pool, UnionMode::SPARSE, children, type) {}
+
+  Status AppendNull() final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(0));
+    return AppendToBitmap(false);
   }
 
- private:
-  TypedBufferBuilder<int8_t> types_builder_;
-  TypedBufferBuilder<int32_t> offsets_builder_;
-  std::vector<std::string> field_names_;
+  Status AppendNulls(int64_t length) final {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(length, 0));
+    return AppendToBitmap(length, false);
+  }
+
+  /// \brief Append an element to the UnionArray. This must be followed
+  ///        by an append to the appropriate child builder.
+  ///
+  /// \param[in] next_type type_id of the child to which the next value will be appended.
+  ///
+  /// The corresponding child builder must be appended to independently after this method
+  /// is called, and all other child builders must have null appended
+  Status Append(int8_t next_type) {
+    ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
+    return AppendToBitmap(true);
+  }
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/concatenate-test.cc b/cpp/src/arrow/array/concatenate-test.cc
index cf105ceb65b..730b25ab822 100644
--- a/cpp/src/arrow/array/concatenate-test.cc
+++ b/cpp/src/arrow/array/concatenate-test.cc
@@ -48,10 +48,11 @@ class ConcatenateTest : public ::testing::Test {
         sizes_({0, 1, 2, 4, 16, 31, 1234}),
         null_probabilities_({0.0, 0.1, 0.5, 0.9, 1.0}) {}
 
-  std::vector<int32_t> Offsets(int32_t length, int32_t slice_count) {
-    std::vector<int32_t> offsets(static_cast<std::size_t>(slice_count + 1));
+  template <typename OffsetType>
+  std::vector<OffsetType> Offsets(int32_t length, int32_t slice_count) {
+    std::vector<OffsetType> offsets(static_cast<std::size_t>(slice_count + 1));
     std::default_random_engine gen(seed_);
-    std::uniform_int_distribution<int32_t> dist(0, length);
+    std::uniform_int_distribution<OffsetType> dist(0, length);
     std::generate(offsets.begin(), offsets.end(), [&] { return dist(gen); });
     std::sort(offsets.begin(), offsets.end());
     return offsets;
@@ -85,7 +86,7 @@ class ConcatenateTest : public ::testing::Test {
   template <typename ArrayFactory>
   void Check(ArrayFactory&& factory) {
     for (auto size : this->sizes_) {
-      auto offsets = this->Offsets(size, 3);
+      auto offsets = this->Offsets<int32_t>(size, 3);
       for (auto null_probability : this->null_probabilities_) {
         std::shared_ptr<Array> array;
         factory(size, null_probability, &array);
@@ -146,16 +147,16 @@ TYPED_TEST(PrimitiveConcatenateTest, Primitives) {
 
 TEST_F(ConcatenateTest, StringType) {
   Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
-    auto values_size = size * 4;
-    auto char_array = this->GeneratePrimitive<Int8Type>(values_size, null_probability);
-    std::shared_ptr<Buffer> offsets;
-    auto offsets_vector = this->Offsets(values_size, size);
-    // ensure the first offset is 0, which is expected for StringType
-    offsets_vector[0] = 0;
-    ASSERT_OK(CopyBufferFromVector(offsets_vector, default_memory_pool(), &offsets));
-    *out = MakeArray(ArrayData::Make(
-        utf8(), size,
-        {char_array->data()->buffers[0], offsets, char_array->data()->buffers[1]}));
+    *out = rng_.String(size, /*min_length =*/0, /*max_length =*/15, null_probability);
+    ASSERT_OK(ValidateArray(**out));
+  });
+}
+
+TEST_F(ConcatenateTest, LargeStringType) {
+  Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
+    *out =
+        rng_.LargeString(size, /*min_length =*/0, /*max_length =*/15, null_probability);
+    ASSERT_OK(ValidateArray(**out));
   });
 }
 
@@ -163,7 +164,7 @@ TEST_F(ConcatenateTest, ListType) {
   Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
     auto values_size = size * 4;
     auto values = this->GeneratePrimitive<Int8Type>(values_size, null_probability);
-    auto offsets_vector = this->Offsets(values_size, size);
+    auto offsets_vector = this->Offsets<int32_t>(values_size, size);
     // ensure the first offset is 0, which is expected for ListType
     offsets_vector[0] = 0;
     std::shared_ptr<Array> offsets;
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 60da0d3f856..a20b157acd5 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -184,14 +184,21 @@ class ConcatenateImpl {
 
   Status Visit(const BinaryType&) {
     std::vector<Range> value_ranges;
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, *offset_type), pool_,
+    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
+                                              &out_.buffers[1], &value_ranges));
+    return ConcatenateBuffers(Buffers(2, value_ranges), pool_, &out_.buffers[2]);
+  }
+
+  Status Visit(const LargeBinaryType&) {
+    std::vector<Range> value_ranges;
+    RETURN_NOT_OK(ConcatenateOffsets<int64_t>(Buffers(1, sizeof(int64_t)), pool_,
                                               &out_.buffers[1], &value_ranges));
     return ConcatenateBuffers(Buffers(2, value_ranges), pool_, &out_.buffers[2]);
   }
 
   Status Visit(const ListType&) {
     std::vector<Range> value_ranges;
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, *offset_type), pool_,
+    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(Buffers(1, sizeof(int32_t)), pool_,
                                               &out_.buffers[1], &value_ranges));
     return ConcatenateImpl(ChildData(0, value_ranges), pool_)
         .Concatenate(out_.child_data[0].get());
@@ -277,13 +284,11 @@ class ConcatenateImpl {
   }
 
   // Gather the index-th buffer of each input into a vector.
-  // Buffers are assumed to contain elements of fixed.bit_width(),
+  // Buffers are assumed to contain elements of the given byte_width,
   // those elements are sliced with that input's offset and length.
   // Note that BufferVector will not contain the buffer of in_[i] if it's
   // nullptr.
-  BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
-    DCHECK_EQ(fixed.bit_width() % 8, 0);
-    auto byte_width = fixed.bit_width() / 8;
+  BufferVector Buffers(size_t index, int byte_width) {
     BufferVector buffers;
     buffers.reserve(in_.size());
     for (const ArrayData& array_data : in_) {
@@ -296,6 +301,16 @@ class ConcatenateImpl {
     return buffers;
   }
 
+  // Gather the index-th buffer of each input into a vector.
+  // Buffers are assumed to contain elements of fixed.bit_width(),
+  // those elements are sliced with that input's offset and length.
+  // Note that BufferVector will not contain the buffer of in_[i] if it's
+  // nullptr.
+  BufferVector Buffers(size_t index, const FixedWidthType& fixed) {
+    DCHECK_EQ(fixed.bit_width() % 8, 0);
+    return Buffers(index, fixed.bit_width() / 8);
+  }
+
   // Gather the index-th buffer of each input as a Bitmap
   // into a vector of Bitmaps.
   std::vector<Bitmap> Bitmaps(size_t index) {
@@ -328,15 +343,11 @@ class ConcatenateImpl {
     return child_data;
   }
 
-  static const std::shared_ptr<FixedWidthType> offset_type;
   const std::vector<ArrayData>& in_;
   MemoryPool* pool_;
   ArrayData out_;
 };
 
-const std::shared_ptr<FixedWidthType> ConcatenateImpl::offset_type =
-    std::static_pointer_cast<FixedWidthType>(int32());
-
 Status Concatenate(const ArrayVector& arrays, MemoryPool* pool,
                    std::shared_ptr<Array>* out) {
   if (arrays.size() == 0) {
diff --git a/cpp/src/arrow/buffer-builder.h b/cpp/src/arrow/buffer-builder.h
index 85f36ee3f5a..797e50b78e7 100644
--- a/cpp/src/arrow/buffer-builder.h
+++ b/cpp/src/arrow/buffer-builder.h
@@ -220,10 +220,8 @@ class TypedBufferBuilder<T, typename std::enable_if<std::is_arithmetic<T>::value
 
   void UnsafeAppend(const int64_t num_copies, T value) {
     auto data = mutable_data() + length();
-    bytes_builder_.UnsafeAppend(num_copies * sizeof(T), 0);
-    for (const auto end = data + num_copies; data != end; ++data) {
-      *data = value;
-    }
+    bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
+    std::fill(data, data + num_copies, value);
   }
 
   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index f6f80425f35..44b0d041be9 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -107,6 +107,8 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
       BUILDER_CASE(DOUBLE, DoubleBuilder);
       BUILDER_CASE(STRING, StringBuilder);
       BUILDER_CASE(BINARY, BinaryBuilder);
+      BUILDER_CASE(LARGE_STRING, LargeStringBuilder);
+      BUILDER_CASE(LARGE_BINARY, LargeBinaryBuilder);
       BUILDER_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryBuilder);
       BUILDER_CASE(DECIMAL, Decimal128Builder);
     case Type::DICTIONARY: {
@@ -155,14 +157,32 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
 
     case Type::STRUCT: {
       const std::vector<std::shared_ptr<Field>>& fields = type->children();
-      std::vector<std::shared_ptr<ArrayBuilder>> values_builder;
+      std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
 
       for (auto it : fields) {
         std::unique_ptr<ArrayBuilder> builder;
         RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
-        values_builder.emplace_back(std::move(builder));
+        field_builders.emplace_back(std::move(builder));
+      }
+      out->reset(new StructBuilder(type, pool, std::move(field_builders)));
+      return Status::OK();
+    }
+
+    case Type::UNION: {
+      const auto& union_type = internal::checked_cast<const UnionType&>(*type);
+      const std::vector<std::shared_ptr<Field>>& fields = type->children();
+      std::vector<std::shared_ptr<ArrayBuilder>> field_builders;
+
+      for (auto it : fields) {
+        std::unique_ptr<ArrayBuilder> builder;
+        RETURN_NOT_OK(MakeBuilder(pool, it->type(), &builder));
+        field_builders.emplace_back(std::move(builder));
+      }
+      if (union_type.mode() == UnionMode::DENSE) {
+        out->reset(new DenseUnionBuilder(pool, std::move(field_builders), type));
+      } else {
+        out->reset(new SparseUnionBuilder(pool, std::move(field_builders), type));
       }
-      out->reset(new StructBuilder(type, pool, std::move(values_builder)));
       return Status::OK();
     }
 
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 56c3e2b3716..223ed5819f1 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -27,6 +27,7 @@
 #include "arrow/array/builder_nested.h"     // IWYU pragma: export
 #include "arrow/array/builder_primitive.h"  // IWYU pragma: export
 #include "arrow/array/builder_time.h"       // IWYU pragma: export
+#include "arrow/array/builder_union.h"      // IWYU pragma: export
 #include "arrow/status.h"
 #include "arrow/util/visibility.h"
 
diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc
deleted file mode 100644
index bb2c63179ab..00000000000
--- a/cpp/src/arrow/column-benchmark.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "benchmark/benchmark.h"
-
-#include "arrow/array.h"
-#include "arrow/memory_pool.h"
-#include "arrow/table.h"
-#include "arrow/testing/gtest_util.h"
-
-namespace arrow {
-namespace {
-template <typename ArrayType>
-Status MakePrimitive(int64_t length, int64_t null_count, std::shared_ptr<Array>* out) {
-  std::shared_ptr<Buffer> data, null_bitmap;
-
-  RETURN_NOT_OK(AllocateBuffer(length * sizeof(typename ArrayType::value_type), &data));
-  RETURN_NOT_OK(AllocateBuffer(BitUtil::BytesForBits(length), &null_bitmap));
-
-  *out = std::make_shared<ArrayType>(length, data, null_bitmap, null_count);
-  return Status::OK();
-}
-}  // anonymous namespace
-
-static void BuildInt32ColumnByChunk(
-    benchmark::State& state) {  // NOLINT non-const reference
-  ArrayVector arrays;
-  for (int chunk_n = 0; chunk_n < state.range(0); ++chunk_n) {
-    std::shared_ptr<Array> array;
-    ABORT_NOT_OK(MakePrimitive<Int32Array>(100, 10, &array));
-    arrays.push_back(array);
-  }
-  const auto INT32 = std::make_shared<Int32Type>();
-  const auto field = std::make_shared<Field>("c0", INT32);
-  std::unique_ptr<Column> column;
-  while (state.KeepRunning()) {
-    column.reset(new Column(field, arrays));
-  }
-}
-
-BENCHMARK(BuildInt32ColumnByChunk)->Range(5, 50000);
-
-}  // namespace arrow
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 12991b94aeb..590ab6e4a1e 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -144,8 +144,9 @@ class RangeEqualsVisitor {
     return Status::OK();
   }
 
-  bool CompareBinaryRange(const BinaryArray& left) const {
-    const auto& right = checked_cast<const BinaryArray&>(right_);
+  template <typename ArrayType>
+  bool CompareBinaryRange(const ArrayType& left) const {
+    const auto& right = checked_cast<const ArrayType&>(right_);
 
     for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
          ++i, ++o_i) {
@@ -154,10 +155,10 @@ class RangeEqualsVisitor {
         return false;
       }
       if (is_null) continue;
-      const int32_t begin_offset = left.value_offset(i);
-      const int32_t end_offset = left.value_offset(i + 1);
-      const int32_t right_begin_offset = right.value_offset(o_i);
-      const int32_t right_end_offset = right.value_offset(o_i + 1);
+      const auto begin_offset = left.value_offset(i);
+      const auto end_offset = left.value_offset(i + 1);
+      const auto right_begin_offset = right.value_offset(o_i);
+      const auto right_end_offset = right.value_offset(o_i + 1);
       // Underlying can't be equal if the size isn't equal
       if (end_offset - begin_offset != right_end_offset - right_begin_offset) {
         return false;
@@ -234,26 +235,15 @@ class RangeEqualsVisitor {
     const auto& left_type = checked_cast<const UnionType&>(*left.type());
 
     // Define a mapping from the type id to child number
-    uint8_t max_code = 0;
-
     const std::vector<uint8_t>& type_codes = left_type.type_codes();
-    for (size_t i = 0; i < type_codes.size(); ++i) {
-      const uint8_t code = type_codes[i];
-      if (code > max_code) {
-        max_code = code;
-      }
-    }
-
-    // Store mapping in a vector for constant time lookups
-    std::vector<uint8_t> type_id_to_child_num(max_code + 1);
-    for (uint8_t i = 0; i < static_cast<uint8_t>(type_codes.size()); ++i) {
+    std::vector<uint8_t> type_id_to_child_num(left.union_type()->max_type_code() + 1, 0);
+    for (uint8_t i = 0; i < type_codes.size(); ++i) {
       type_id_to_child_num[type_codes[i]] = i;
     }
 
     const uint8_t* left_ids = left.raw_type_ids();
     const uint8_t* right_ids = right.raw_type_ids();
 
-    uint8_t id, child_num;
     for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
          ++i, ++o_i) {
       if (left.IsNull(i) != right.IsNull(o_i)) {
@@ -264,8 +254,7 @@ class RangeEqualsVisitor {
         return false;
       }
 
-      id = left_ids[i];
-      child_num = type_id_to_child_num[id];
+      auto child_num = type_id_to_child_num[left_ids[i]];
 
       // TODO(wesm): really we should be comparing stretches of non-null data
       // rather than looking at one value at a time.
@@ -290,6 +279,11 @@ class RangeEqualsVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryArray& left) {
+    result_ = CompareBinaryRange(left);
+    return Status::OK();
+  }
+
   Status Visit(const FixedSizeBinaryArray& left) {
     const auto& right = checked_cast<const FixedSizeBinaryArray&>(right_);
 
@@ -501,18 +495,21 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
 
   template <typename ArrayType>
   bool ValueOffsetsEqual(const ArrayType& left) {
+    using offset_type = typename ArrayType::offset_type;
+
     const auto& right = checked_cast<const ArrayType&>(right_);
 
     if (left.offset() == 0 && right.offset() == 0) {
       return left.value_offsets()->Equals(*right.value_offsets(),
-                                          (left.length() + 1) * sizeof(int32_t));
+                                          (left.length() + 1) * sizeof(offset_type));
     } else {
       // One of the arrays is sliced; logic is more complicated because the
       // value offsets are not both 0-based
       auto left_offsets =
-          reinterpret_cast<const int32_t*>(left.value_offsets()->data()) + left.offset();
+          reinterpret_cast<const offset_type*>(left.value_offsets()->data()) +
+          left.offset();
       auto right_offsets =
-          reinterpret_cast<const int32_t*>(right.value_offsets()->data()) +
+          reinterpret_cast<const offset_type*>(right.value_offsets()->data()) +
           right.offset();
 
       for (int64_t i = 0; i < left.length() + 1; ++i) {
@@ -524,10 +521,11 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
     }
   }
 
-  bool CompareBinary(const BinaryArray& left) {
-    const auto& right = checked_cast<const BinaryArray&>(right_);
+  template <typename ArrayType>
+  bool CompareBinary(const ArrayType& left) {
+    const auto& right = checked_cast<const ArrayType&>(right_);
 
-    bool equal_offsets = ValueOffsetsEqual<BinaryArray>(left);
+    bool equal_offsets = ValueOffsetsEqual<ArrayType>(left);
     if (!equal_offsets) {
       return false;
     }
@@ -556,8 +554,8 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
       }
     } else {
       // ARROW-537: Only compare data in non-null slots
-      const int32_t* left_offsets = left.raw_value_offsets();
-      const int32_t* right_offsets = right.raw_value_offsets();
+      auto left_offsets = left.raw_value_offsets();
+      auto right_offsets = right.raw_value_offsets();
       for (int64_t i = 0; i < left.length(); ++i) {
         if (left.IsNull(i)) {
           continue;
@@ -576,6 +574,11 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryArray& left) {
+    result_ = CompareBinary(left);
+    return Status::OK();
+  }
+
   Status Visit(const ListArray& left) {
     const auto& right = checked_cast<const ListArray&>(right_);
     bool equal_offsets = ValueOffsetsEqual<ListArray>(left);
@@ -773,30 +776,16 @@ class TypeEqualsVisitor {
   Status Visit(const UnionType& left) {
     const auto& right = checked_cast<const UnionType&>(right_);
 
-    if (left.mode() != right.mode() ||
-        left.type_codes().size() != right.type_codes().size()) {
+    if (left.mode() != right.mode() || left.type_codes() != right.type_codes()) {
       result_ = false;
       return Status::OK();
     }
 
-    const std::vector<uint8_t>& left_codes = left.type_codes();
-    const std::vector<uint8_t>& right_codes = right.type_codes();
-
-    for (size_t i = 0; i < left_codes.size(); ++i) {
-      if (left_codes[i] != right_codes[i]) {
-        result_ = false;
-        return Status::OK();
-      }
-    }
-
-    for (int i = 0; i < left.num_children(); ++i) {
-      if (!left.child(i)->Equals(right_.child(i), check_metadata_)) {
-        result_ = false;
-        return Status::OK();
-      }
-    }
-
-    result_ = true;
+    result_ = std::equal(
+        left.children().begin(), left.children().end(), right.children().begin(),
+        [this](const std::shared_ptr<Field>& l, const std::shared_ptr<Field>& r) {
+          return l->Equals(r, check_metadata_);
+        });
     return Status::OK();
   }
 
@@ -848,6 +837,15 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
+  template <typename T>
+  typename std::enable_if<std::is_base_of<LargeBinaryScalar, T>::value, Status>::type
+  Visit(const T& left_) {
+    const auto& left = checked_cast<const LargeBinaryScalar&>(left_);
+    const auto& right = checked_cast<const LargeBinaryScalar&>(right_);
+    result_ = internal::SharedPtrEquals(left.value, right.value);
+    return Status::OK();
+  }
+
   Status Visit(const Decimal128Scalar& left) {
     const auto& right = checked_cast<const Decimal128Scalar&>(right_);
     result_ = left.value == right.value;
@@ -970,7 +968,13 @@ bool TensorEquals(const Tensor& left, const Tensor& right) {
   } else if (left.size() == 0) {
     are_equal = true;
   } else {
-    if (!left.is_contiguous() || !right.is_contiguous()) {
+    const bool left_row_major_p = left.is_row_major();
+    const bool left_column_major_p = left.is_column_major();
+    const bool right_row_major_p = right.is_row_major();
+    const bool right_column_major_p = right.is_column_major();
+
+    if (!(left_row_major_p && right_row_major_p) &&
+        !(left_column_major_p && right_column_major_p)) {
       const auto& shape = left.shape();
       if (shape != right.shape()) {
         are_equal = false;
@@ -1026,9 +1030,8 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
 
     const uint8_t* left_data = left.data()->data();
     const uint8_t* right_data = right.data()->data();
-
     return memcmp(left_data, right_data,
-                  static_cast<size_t>(byte_width * left.non_zero_length()));
+                  static_cast<size_t>(byte_width * left.non_zero_length())) == 0;
   }
 };
 
diff --git a/cpp/src/arrow/compute/context.h b/cpp/src/arrow/compute/context.h
index 8ac4700b91f..fc99a4f8108 100644
--- a/cpp/src/arrow/compute/context.h
+++ b/cpp/src/arrow/compute/context.h
@@ -36,7 +36,7 @@ class CpuInfo;
 
 namespace compute {
 
-#define RETURN_IF_ERROR(ctx)                  \
+#define ARROW_RETURN_IF_ERROR(ctx)            \
   if (ARROW_PREDICT_FALSE(ctx->HasError())) { \
     Status s = ctx->status();                 \
     ctx->ResetStatus();                       \
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
index 510b9a48c73..80538f20e4f 100644
--- a/cpp/src/arrow/compute/kernels/cast-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -52,6 +52,8 @@ namespace compute {
 
 using internal::checked_cast;
 
+static constexpr const char* kInvalidUtf8 = "\xa0\xa1";
+
 static std::vector<std::shared_ptr<DataType>> kNumericTypes = {
     uint8(), int8(),   uint16(), int16(),   uint32(),
     int32(), uint64(), int64(),  float32(), float64()};
@@ -131,6 +133,132 @@ class TestCast : public ComputeFixture, public TestBase {
       CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
     }
   }
+
+  template <typename SourceType, typename DestType>
+  void TestCastBinaryToString() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+    auto dest_type = TypeTraits<DestType>::type_singleton();
+
+    // All valid except the last one
+    std::vector<bool> all = {1, 1, 1, 1, 1};
+    std::vector<bool> valid = {1, 1, 1, 1, 0};
+    std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
+
+    std::shared_ptr<Array> array;
+
+    // Should accept when invalid but null.
+    ArrayFromVector<SourceType, std::string>(src_type, valid, strings, &array);
+    CheckZeroCopy(*array, dest_type);
+
+    // Should refuse due to invalid utf8 payload
+    CheckFails<SourceType, std::string>(src_type, strings, all, dest_type, options);
+
+    // Should accept due to option override
+    options.allow_invalid_utf8 = true;
+    CheckCase<SourceType, std::string, DestType, std::string>(
+        src_type, strings, all, dest_type, strings, options);
+  }
+
+  template <typename SourceType>
+  void TestCastStringToNumber() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+
+    std::vector<bool> is_valid = {true, false, true, true, true};
+
+    // string to int
+    std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
+    std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
+    std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
+    std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
+    std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
+    CheckCase<SourceType, std::string, Int8Type, int8_t>(src_type, v_int, is_valid,
+                                                         int8(), e_int8, options);
+    CheckCase<SourceType, std::string, Int16Type, int16_t>(src_type, v_int, is_valid,
+                                                           int16(), e_int16, options);
+    CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
+                                                           int32(), e_int32, options);
+    CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
+                                                           int64(), e_int64, options);
+
+    v_int = {"2147483647", "0", "-2147483648", "0", "0"};
+    e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
+    CheckCase<SourceType, std::string, Int32Type, int32_t>(src_type, v_int, is_valid,
+                                                           int32(), e_int32, options);
+    v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
+    e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
+    CheckCase<SourceType, std::string, Int64Type, int64_t>(src_type, v_int, is_valid,
+                                                           int64(), e_int64, options);
+
+    // string to uint
+    std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
+    std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
+    std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
+    std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
+    std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
+    CheckCase<SourceType, std::string, UInt8Type, uint8_t>(src_type, v_uint, is_valid,
+                                                           uint8(), e_uint8, options);
+    CheckCase<SourceType, std::string, UInt16Type, uint16_t>(src_type, v_uint, is_valid,
+                                                             uint16(), e_uint16, options);
+    CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
+                                                             uint32(), e_uint32, options);
+    CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
+                                                             uint64(), e_uint64, options);
+
+    v_uint = {"4294967295", "0", "0", "0", "0"};
+    e_uint32 = {4294967295, 0, 0, 0, 0};
+    CheckCase<SourceType, std::string, UInt32Type, uint32_t>(src_type, v_uint, is_valid,
+                                                             uint32(), e_uint32, options);
+    v_uint = {"18446744073709551615", "0", "0", "0", "0"};
+    e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
+    CheckCase<SourceType, std::string, UInt64Type, uint64_t>(src_type, v_uint, is_valid,
+                                                             uint64(), e_uint64, options);
+
+    // string to float
+    std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
+    std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
+    std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
+    CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
+                                                         float32(), e_float, options);
+    CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
+                                                           float64(), e_double, options);
+
+    // Test that casting is locale-independent
+    auto global_locale = std::locale();
+    try {
+      // French locale uses the comma as decimal point
+      std::locale::global(std::locale("fr_FR.UTF-8"));
+    } catch (std::runtime_error&) {
+      // Locale unavailable, ignore
+    }
+    CheckCase<SourceType, std::string, FloatType, float>(src_type, v_float, is_valid,
+                                                         float32(), e_float, options);
+    CheckCase<SourceType, std::string, DoubleType, double>(src_type, v_float, is_valid,
+                                                           float64(), e_double, options);
+    std::locale::global(global_locale);
+  }
+
+  template <typename SourceType>
+  void TestCastStringToTimestamp() {
+    CastOptions options;
+    auto src_type = TypeTraits<SourceType>::type_singleton();
+
+    std::vector<bool> is_valid = {true, false, true};
+    std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
+
+    auto type = timestamp(TimeUnit::SECOND);
+    std::vector<int64_t> e = {0, 0, 951782400};
+    CheckCase<SourceType, std::string, TimestampType, int64_t>(
+        src_type, strings, is_valid, type, e, options);
+
+    type = timestamp(TimeUnit::MICRO);
+    e = {0, 0, 951782400000000LL};
+    CheckCase<SourceType, std::string, TimestampType, int64_t>(
+        src_type, strings, is_valid, type, e, options);
+
+    // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
+  }
 };
 
 TEST_F(TestCast, SameTypeZeroCopy) {
@@ -263,7 +391,7 @@ std::vector<O> UnsafeVectorCast(const std::vector<I>& v) {
 
   for (size_t i = 0; i < v.size(); i++) result[i] = static_cast<O>(v[i]);
 
-  return std::move(result);
+  return result;
 }
 
 TEST_F(TestCast, IntegerSignedToUnsigned) {
@@ -922,6 +1050,10 @@ TEST_F(TestCast, StringToBoolean) {
                                                         e, options);
   CheckCase<StringType, std::string, BooleanType, bool>(utf8(), v2, is_valid, boolean(),
                                                         e, options);
+
+  // Same with LargeStringType
+  CheckCase<LargeStringType, std::string, BooleanType, bool>(large_utf8(), v1, is_valid,
+                                                             boolean(), e, options);
 }
 
 TEST_F(TestCast, StringToBooleanErrors) {
@@ -931,84 +1063,13 @@ TEST_F(TestCast, StringToBooleanErrors) {
 
   CheckFails<StringType, std::string>(utf8(), {"false "}, is_valid, boolean(), options);
   CheckFails<StringType, std::string>(utf8(), {"T"}, is_valid, boolean(), options);
+  CheckFails<LargeStringType, std::string>(large_utf8(), {"T"}, is_valid, boolean(),
+                                           options);
 }
 
-TEST_F(TestCast, StringToNumber) {
-  CastOptions options;
+TEST_F(TestCast, StringToNumber) { TestCastStringToNumber<StringType>(); }
 
-  std::vector<bool> is_valid = {true, false, true, true, true};
-
-  // string to int
-  std::vector<std::string> v_int = {"0", "1", "127", "-1", "0"};
-  std::vector<int8_t> e_int8 = {0, 1, 127, -1, 0};
-  std::vector<int16_t> e_int16 = {0, 1, 127, -1, 0};
-  std::vector<int32_t> e_int32 = {0, 1, 127, -1, 0};
-  std::vector<int64_t> e_int64 = {0, 1, 127, -1, 0};
-  CheckCase<StringType, std::string, Int8Type, int8_t>(utf8(), v_int, is_valid, int8(),
-                                                       e_int8, options);
-  CheckCase<StringType, std::string, Int16Type, int16_t>(utf8(), v_int, is_valid, int16(),
-                                                         e_int16, options);
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  v_int = {"2147483647", "0", "-2147483648", "0", "0"};
-  e_int32 = {2147483647, 0, -2147483648LL, 0, 0};
-  CheckCase<StringType, std::string, Int32Type, int32_t>(utf8(), v_int, is_valid, int32(),
-                                                         e_int32, options);
-  v_int = {"9223372036854775807", "0", "-9223372036854775808", "0", "0"};
-  e_int64 = {9223372036854775807LL, 0, (-9223372036854775807LL - 1), 0, 0};
-  CheckCase<StringType, std::string, Int64Type, int64_t>(utf8(), v_int, is_valid, int64(),
-                                                         e_int64, options);
-
-  // string to uint
-  std::vector<std::string> v_uint = {"0", "1", "127", "255", "0"};
-  std::vector<uint8_t> e_uint8 = {0, 1, 127, 255, 0};
-  std::vector<uint16_t> e_uint16 = {0, 1, 127, 255, 0};
-  std::vector<uint32_t> e_uint32 = {0, 1, 127, 255, 0};
-  std::vector<uint64_t> e_uint64 = {0, 1, 127, 255, 0};
-  CheckCase<StringType, std::string, UInt8Type, uint8_t>(utf8(), v_uint, is_valid,
-                                                         uint8(), e_uint8, options);
-  CheckCase<StringType, std::string, UInt16Type, uint16_t>(utf8(), v_uint, is_valid,
-                                                           uint16(), e_uint16, options);
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  v_uint = {"4294967295", "0", "0", "0", "0"};
-  e_uint32 = {4294967295, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt32Type, uint32_t>(utf8(), v_uint, is_valid,
-                                                           uint32(), e_uint32, options);
-  v_uint = {"18446744073709551615", "0", "0", "0", "0"};
-  e_uint64 = {18446744073709551615ULL, 0, 0, 0, 0};
-  CheckCase<StringType, std::string, UInt64Type, uint64_t>(utf8(), v_uint, is_valid,
-                                                           uint64(), e_uint64, options);
-
-  // string to float
-  std::vector<std::string> v_float = {"0.1", "1.2", "127.3", "200.4", "0.5"};
-  std::vector<float> e_float = {0.1f, 1.2f, 127.3f, 200.4f, 0.5f};
-  std::vector<double> e_double = {0.1, 1.2, 127.3, 200.4, 0.5};
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-
-  // Test that casting is locale-independent
-  auto global_locale = std::locale();
-  try {
-    // French locale uses the comma as decimal point
-    std::locale::global(std::locale("fr_FR.UTF-8"));
-  } catch (std::runtime_error&) {
-    // Locale unavailable, ignore
-  }
-  CheckCase<StringType, std::string, FloatType, float>(utf8(), v_float, is_valid,
-                                                       float32(), e_float, options);
-  CheckCase<StringType, std::string, DoubleType, double>(utf8(), v_float, is_valid,
-                                                         float64(), e_double, options);
-  std::locale::global(global_locale);
-}
+TEST_F(TestCast, LargeStringToNumber) { TestCastStringToNumber<LargeStringType>(); }
 
 TEST_F(TestCast, StringToNumberErrors) {
   CastOptions options;
@@ -1027,24 +1088,9 @@ TEST_F(TestCast, StringToNumberErrors) {
   CheckFails<StringType, std::string>(utf8(), {"z"}, is_valid, float32(), options);
 }
 
-TEST_F(TestCast, StringToTimestamp) {
-  CastOptions options;
-
-  std::vector<bool> is_valid = {true, false, true};
-  std::vector<std::string> strings = {"1970-01-01", "xxx", "2000-02-29"};
+TEST_F(TestCast, StringToTimestamp) { TestCastStringToTimestamp<StringType>(); }
 
-  auto type = timestamp(TimeUnit::SECOND);
-  std::vector<int64_t> e = {0, 0, 951782400};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  type = timestamp(TimeUnit::MICRO);
-  e = {0, 0, 951782400000000LL};
-  CheckCase<StringType, std::string, TimestampType, int64_t>(utf8(), strings, is_valid,
-                                                             type, e, options);
-
-  // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc
-}
+TEST_F(TestCast, LargeStringToTimestamp) { TestCastStringToTimestamp<LargeStringType>(); }
 
 TEST_F(TestCast, StringToTimestampErrors) {
   CastOptions options;
@@ -1058,29 +1104,10 @@ TEST_F(TestCast, StringToTimestampErrors) {
   }
 }
 
-constexpr const char* kInvalidUtf8 = "\xa0\xa1";
-
-TEST_F(TestCast, BinaryToString) {
-  CastOptions options;
-
-  // All valid except the last one
-  std::vector<bool> all = {1, 1, 1, 1, 1};
-  std::vector<bool> valid = {1, 1, 1, 1, 0};
-  std::vector<std::string> strings = {"Hi", "olá mundo", "你好世界", "", kInvalidUtf8};
-
-  std::shared_ptr<Array> array;
-
-  // Should accept when invalid but null.
-  ArrayFromVector<BinaryType, std::string>(binary(), valid, strings, &array);
-  CheckZeroCopy(*array, utf8());
-
-  // Should refuse due to invalid utf8 payload
-  CheckFails<BinaryType, std::string>(binary(), strings, all, utf8(), options);
+TEST_F(TestCast, BinaryToString) { TestCastBinaryToString<BinaryType, StringType>(); }
 
-  // Should accept due to option override
-  options.allow_invalid_utf8 = true;
-  CheckCase<BinaryType, std::string, StringType, std::string>(binary(), strings, all,
-                                                              utf8(), strings, options);
+TEST_F(TestCast, LargeBinaryToLargeString) {
+  TestCastBinaryToString<LargeBinaryType, LargeStringType>();
 }
 
 TEST_F(TestCast, ListToList) {
diff --git a/cpp/src/arrow/compute/kernels/cast.cc b/cpp/src/arrow/compute/kernels/cast.cc
index 299ca80402c..a8b66159997 100644
--- a/cpp/src/arrow/compute/kernels/cast.cc
+++ b/cpp/src/arrow/compute/kernels/cast.cc
@@ -22,7 +22,6 @@
 #include <functional>
 #include <limits>
 #include <memory>
-#include <sstream>
 #include <type_traits>
 #include <utility>
 #include <vector>
@@ -46,26 +45,25 @@
 
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
 
-#define FUNC_RETURN_NOT_OK(s)                                                       \
-  do {                                                                              \
-    Status _s = (s);                                                                \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) {                                            \
-      std::stringstream ss;                                                         \
-      ss << __FILE__ << ":" << __LINE__ << " code: " << #s << "\n" << _s.message(); \
-      ctx->SetStatus(Status(_s.code(), ss.str()));                                  \
-      return;                                                                       \
-    }                                                                               \
+#define FUNC_RETURN_NOT_OK(expr)                     \
+  do {                                               \
+    Status _st = (expr);                             \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {            \
+      _st.AddContextLine(__FILE__, __LINE__, #expr); \
+      ctx->SetStatus(_st);                           \
+      return;                                        \
+    }                                                \
   } while (0)
 
 #else
 
-#define FUNC_RETURN_NOT_OK(s)            \
-  do {                                   \
-    Status _s = (s);                     \
-    if (ARROW_PREDICT_FALSE(!_s.ok())) { \
-      ctx->SetStatus(_s);                \
-      return;                            \
-    }                                    \
+#define FUNC_RETURN_NOT_OK(expr)          \
+  do {                                    \
+    Status _st = (expr);                  \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) { \
+      ctx->SetStatus(_st);                \
+      return;                             \
+    }                                     \
   } while (0)
 
 #endif  // ARROW_EXTRA_ERROR_CONTEXT
@@ -642,7 +640,7 @@ Status InvokeWithAllocation(FunctionContext* ctx, UnaryKernel* func, const Datum
   } else {
     RETURN_NOT_OK(detail::InvokeUnaryArrayKernel(ctx, func, input, &result));
   }
-  RETURN_IF_ERROR(ctx);
+  ARROW_RETURN_IF_ERROR(ctx);
   *out = detail::WrapDatumsLike(input, result);
   return Status::OK();
 }
@@ -907,13 +905,15 @@ struct CastFunctor<T, DictionaryType> {
 // ----------------------------------------------------------------------
 // String to Number
 
-template <typename O>
-struct CastFunctor<O, StringType, enable_if_number<O>> {
+template <typename I, typename O>
+struct CastFunctor<O, I,
+                   typename std::enable_if<is_any_string_type<I>::value &&
+                                           is_number_type<O>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     using out_type = typename O::c_type;
 
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     auto out_data = output->GetMutableValues<out_type>(1);
     internal::StringConverter<O> converter;
 
@@ -935,15 +935,15 @@ struct CastFunctor<O, StringType, enable_if_number<O>> {
 // ----------------------------------------------------------------------
 // String to Boolean
 
-template <typename O>
-struct CastFunctor<O, StringType,
-                   typename std::enable_if<std::is_same<BooleanType, O>::value>::type> {
+template <typename I>
+struct CastFunctor<BooleanType, I,
+                   typename std::enable_if<is_any_string_type<I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     internal::FirstTimeBitmapWriter writer(output->buffers[1]->mutable_data(),
                                            output->offset, input.length);
-    internal::StringConverter<O> converter;
+    internal::StringConverter<BooleanType> converter;
 
     for (int64_t i = 0; i < input.length; ++i) {
       if (input_array.IsNull(i)) {
@@ -974,13 +974,14 @@ struct CastFunctor<O, StringType,
 // ----------------------------------------------------------------------
 // String to Timestamp
 
-template <>
-struct CastFunctor<TimestampType, StringType> {
+template <typename I>
+struct CastFunctor<TimestampType, I,
+                   typename std::enable_if<is_any_string_type<I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
     using out_type = TimestampType::c_type;
 
-    StringArray input_array(input.Copy());
+    typename TypeTraits<I>::ArrayType input_array(input.Copy());
     auto out_data = output->GetMutableValues<out_type>(1);
     internal::StringConverter<TimestampType> converter(output->type);
 
@@ -1003,47 +1004,51 @@ struct CastFunctor<TimestampType, StringType> {
 // Binary to String
 //
 
-template <typename I>
-struct CastFunctor<StringType, I,
-                   typename std::enable_if<std::is_same<BinaryType, I>::value>::type> {
+#if defined(_MSC_VER)
+// Silence warning: """'visitor': unreferenced local variable"""
+#pragma warning(push)
+#pragma warning(disable : 4101)
+#endif
+
+template <typename I, typename O>
+struct BinaryToStringSameWidthCastFunctor {
   void operator()(FunctionContext* ctx, const CastOptions& options,
                   const ArrayData& input, ArrayData* output) {
-    BinaryArray binary(input.Copy());
+    if (!options.allow_invalid_utf8) {
+      util::InitializeUTF8();
 
-    if (options.allow_invalid_utf8) {
-      ZeroCopyData(input, output);
-      return;
+      ArrayDataVisitor<I> visitor;
+      Status st = visitor.Visit(input, this);
+      if (!st.ok()) {
+        ctx->SetStatus(st);
+        return;
+      }
     }
+    ZeroCopyData(input, output);
+  }
 
-    util::InitializeUTF8();
-
-    if (binary.null_count() != 0) {
-      for (int64_t i = 0; i < input.length; i++) {
-        if (binary.IsNull(i)) {
-          continue;
-        }
-
-        const auto str = binary.GetView(i);
-        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
-          return;
-        }
-      }
+  Status VisitNull() { return Status::OK(); }
 
-    } else {
-      for (int64_t i = 0; i < input.length; i++) {
-        const auto str = binary.GetView(i);
-        if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
-          ctx->SetStatus(Status::Invalid("Invalid UTF8 payload"));
-          return;
-        }
-      }
+  Status VisitValue(util::string_view str) {
+    if (ARROW_PREDICT_FALSE(!arrow::util::ValidateUTF8(str))) {
+      return Status::Invalid("Invalid UTF8 payload");
     }
-
-    ZeroCopyData(input, output);
+    return Status::OK();
   }
 };
 
+template <>
+struct CastFunctor<StringType, BinaryType>
+    : public BinaryToStringSameWidthCastFunctor<StringType, BinaryType> {};
+
+template <>
+struct CastFunctor<LargeStringType, LargeBinaryType>
+    : public BinaryToStringSameWidthCastFunctor<LargeStringType, LargeBinaryType> {};
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
 // ----------------------------------------------------------------------
 
 typedef std::function<void(FunctionContext*, const CastOptions& options, const ArrayData&,
@@ -1090,7 +1095,7 @@ class CastKernel : public CastKernelBase {
     RETURN_NOT_OK(detail::PropagateNulls(ctx, in_data, result));
 
     func_(ctx, options_, in_data, result);
-    RETURN_IF_ERROR(ctx);
+    ARROW_RETURN_IF_ERROR(ctx);
     return Status::OK();
   }
 
@@ -1144,6 +1149,8 @@ GET_CAST_FUNCTION(TIME64_CASES, Time64Type)
 GET_CAST_FUNCTION(TIMESTAMP_CASES, TimestampType)
 GET_CAST_FUNCTION(BINARY_CASES, BinaryType)
 GET_CAST_FUNCTION(STRING_CASES, StringType)
+GET_CAST_FUNCTION(LARGEBINARY_CASES, LargeBinaryType)
+GET_CAST_FUNCTION(LARGESTRING_CASES, LargeStringType)
 GET_CAST_FUNCTION(DICTIONARY_CASES, DictionaryType)
 
 #define CAST_FUNCTION_CASE(InType)                      \
@@ -1227,6 +1234,8 @@ Status GetCastFunction(const DataType& in_type, std::shared_ptr<DataType> out_ty
     CAST_FUNCTION_CASE(TimestampType);
     CAST_FUNCTION_CASE(BinaryType);
     CAST_FUNCTION_CASE(StringType);
+    CAST_FUNCTION_CASE(LargeBinaryType);
+    CAST_FUNCTION_CASE(LargeStringType);
     CAST_FUNCTION_CASE(DictionaryType);
     case Type::LIST:
       RETURN_NOT_OK(GetListCastFunc(in_type, std::move(out_type), options, kernel));
diff --git a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
index 77334af36b5..fb82067bb02 100644
--- a/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
+++ b/cpp/src/arrow/compute/kernels/generated/cast-codegen-internal.h
@@ -171,6 +171,9 @@
 #define BINARY_CASES(TEMPLATE) \
   TEMPLATE(BinaryType, StringType)
 
+#define LARGEBINARY_CASES(TEMPLATE) \
+  TEMPLATE(LargeBinaryType, LargeStringType)
+
 #define STRING_CASES(TEMPLATE) \
   TEMPLATE(StringType, BooleanType) \
   TEMPLATE(StringType, UInt8Type) \
@@ -185,6 +188,20 @@
   TEMPLATE(StringType, DoubleType) \
   TEMPLATE(StringType, TimestampType)
 
+#define LARGESTRING_CASES(TEMPLATE) \
+  TEMPLATE(LargeStringType, BooleanType) \
+  TEMPLATE(LargeStringType, UInt8Type) \
+  TEMPLATE(LargeStringType, Int8Type) \
+  TEMPLATE(LargeStringType, UInt16Type) \
+  TEMPLATE(LargeStringType, Int16Type) \
+  TEMPLATE(LargeStringType, UInt32Type) \
+  TEMPLATE(LargeStringType, Int32Type) \
+  TEMPLATE(LargeStringType, UInt64Type) \
+  TEMPLATE(LargeStringType, Int64Type) \
+  TEMPLATE(LargeStringType, FloatType) \
+  TEMPLATE(LargeStringType, DoubleType) \
+  TEMPLATE(LargeStringType, TimestampType)
+
 #define DICTIONARY_CASES(TEMPLATE) \
   TEMPLATE(DictionaryType, UInt8Type) \
   TEMPLATE(DictionaryType, Int8Type) \
diff --git a/cpp/src/arrow/compute/kernels/generated/codegen.py b/cpp/src/arrow/compute/kernels/generated/codegen.py
index 04fc38618bd..c9db7eaa0dc 100644
--- a/cpp/src/arrow/compute/kernels/generated/codegen.py
+++ b/cpp/src/arrow/compute/kernels/generated/codegen.py
@@ -85,7 +85,9 @@ def generate(self):
     CastCodeGenerator('Timestamp', ['Date32', 'Date64', 'Timestamp'],
                       parametric=True),
     CastCodeGenerator('Binary', ['String']),
+    CastCodeGenerator('LargeBinary', ['LargeString']),
     CastCodeGenerator('String', NUMERIC_TYPES + ['Timestamp']),
+    CastCodeGenerator('LargeString', NUMERIC_TYPES + ['Timestamp']),
     CastCodeGenerator('Dictionary',
                       INTEGER_TYPES + FLOATING_TYPES + DATE_TIME_TYPES +
                       ['Null', 'Binary', 'FixedSizeBinary', 'String',
diff --git a/cpp/src/arrow/compute/kernels/take-benchmark.cc b/cpp/src/arrow/compute/kernels/take-benchmark.cc
index 139e183b92f..d28f7af2abb 100644
--- a/cpp/src/arrow/compute/kernels/take-benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/take-benchmark.cc
@@ -48,7 +48,8 @@ static void TakeInt64(benchmark::State& state) {
 
   auto values = rand.Int64(array_size, -100, 100, args.null_proportion);
 
-  auto indices = rand.Int32(array_size, 0, array_size - 1, args.null_proportion);
+  auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
+                            static_cast<int32_t>(array_size - 1), args.null_proportion);
 
   TakeBenchmark(state, values, indices);
 }
@@ -64,7 +65,8 @@ static void TakeFixedSizeList1Int64(benchmark::State& state) {
       fixed_size_list(int64(), 1), array_size, int_array, int_array->null_bitmap(),
       int_array->null_count());
 
-  auto indices = rand.Int32(array_size, 0, array_size - 1, args.null_proportion);
+  auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
+                            static_cast<int32_t>(array_size - 1), args.null_proportion);
 
   TakeBenchmark(state, values, indices);
 }
@@ -110,7 +112,8 @@ static void TakeString(benchmark::State& state) {
   auto values = std::static_pointer_cast<StringArray>(rand.String(
       array_size, string_min_length, string_max_length, args.null_proportion));
 
-  auto indices = rand.Int32(array_size, 0, array_size - 1, args.null_proportion);
+  auto indices = rand.Int32(static_cast<int32_t>(array_size), 0,
+                            static_cast<int32_t>(array_size - 1), args.null_proportion);
 
   TakeBenchmark(state, values, indices);
 }
diff --git a/cpp/src/arrow/csv/column-builder.cc b/cpp/src/arrow/csv/column-builder.cc
index 657aa6f4e96..cfc36fe6109 100644
--- a/cpp/src/arrow/csv/column-builder.cc
+++ b/cpp/src/arrow/csv/column-builder.cc
@@ -75,8 +75,8 @@ class TypedColumnBuilder : public ColumnBuilder {
       return st;
     } else {
       std::stringstream ss;
-      ss << "In column #" << col_index_ << ": " << st.message();
-      return Status(st.code(), ss.str());
+      ss << "In CSV column #" << col_index_ << ": " << st.message();
+      return st.WithMessage(ss.str());
     }
   }
 
diff --git a/cpp/src/arrow/csv/converter-test.cc b/cpp/src/arrow/csv/converter-test.cc
index a5e4c0372c4..53176ff0a1a 100644
--- a/cpp/src/arrow/csv/converter-test.cc
+++ b/cpp/src/arrow/csv/converter-test.cc
@@ -30,6 +30,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 
@@ -118,11 +119,17 @@ void AssertConversionError(const std::shared_ptr<DataType>& type,
 //////////////////////////////////////////////////////////////////////////
 // Test functions begin here
 
-TEST(BinaryConversion, Basics) {
-  AssertConversion<BinaryType, std::string>(binary(), {"ab,cdé\n", ",\xffgh\n"},
-                                            {{"ab", ""}, {"cdé", "\xffgh"}});
+template <typename T>
+static void TestBinaryConversionBasics() {
+  auto type = TypeTraits<T>::type_singleton();
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                   {{"ab", ""}, {"cdé", "\xffgh"}});
 }
 
+TEST(BinaryConversion, Basics) { TestBinaryConversionBasics<BinaryType>(); }
+
+TEST(LargeBinaryConversion, Basics) { TestBinaryConversionBasics<LargeBinaryType>(); }
+
 TEST(BinaryConversion, Nulls) {
   AssertConversion<BinaryType, std::string>(binary(), {"ab,N/A\n", "NULL,\n"},
                                             {{"ab", "NULL"}, {"N/A", ""}},
@@ -135,16 +142,22 @@ TEST(BinaryConversion, Nulls) {
                                             {{true, false}, {false, false}}, options);
 }
 
-TEST(StringConversion, Basics) {
-  AssertConversion<StringType, std::string>(utf8(), {"ab,cdé\n", ",gh\n"},
-                                            {{"ab", ""}, {"cdé", "gh"}});
+template <typename T>
+static void TestStringConversionBasics() {
+  auto type = TypeTraits<T>::type_singleton();
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",gh\n"},
+                                   {{"ab", ""}, {"cdé", "gh"}});
 
   auto options = ConvertOptions::Defaults();
   options.check_utf8 = false;
-  AssertConversion<StringType, std::string>(utf8(), {"ab,cdé\n", ",\xffgh\n"},
-                                            {{"ab", ""}, {"cdé", "\xffgh"}}, options);
+  AssertConversion<T, std::string>(type, {"ab,cdé\n", ",\xffgh\n"},
+                                   {{"ab", ""}, {"cdé", "\xffgh"}}, options);
 }
 
+TEST(StringConversion, Basics) { TestStringConversionBasics<StringType>(); }
+
+TEST(LargeStringConversion, Basics) { TestStringConversionBasics<LargeStringType>(); }
+
 TEST(StringConversion, Nulls) {
   AssertConversion<StringType, std::string>(utf8(), {"ab,N/A\n", "NULL,\n"},
                                             {{"ab", "NULL"}, {"N/A", ""}},
@@ -157,11 +170,17 @@ TEST(StringConversion, Nulls) {
                                             {{true, false}, {false, false}}, options);
 }
 
-TEST(StringConversion, Errors) {
+template <typename T>
+static void TestStringConversionErrors() {
+  auto type = TypeTraits<T>::type_singleton();
   // Invalid UTF8 in column 0
-  AssertConversionError(utf8(), {"ab,cdé\n", "\xff,gh\n"}, {0});
+  AssertConversionError(type, {"ab,cdé\n", "\xff,gh\n"}, {0});
 }
 
+TEST(StringConversion, Errors) { TestStringConversionErrors<StringType>(); }
+
+TEST(LargeStringConversion, Errors) { TestStringConversionErrors<LargeStringType>(); }
+
 TEST(FixedSizeBinaryConversion, Basics) {
   AssertConversion<FixedSizeBinaryType, std::string>(
       fixed_size_binary(2), {"ab,cd\n", "gh,ij\n"}, {{"ab", "gh"}, {"cd", "ij"}});
diff --git a/cpp/src/arrow/csv/converter.cc b/cpp/src/arrow/csv/converter.cc
index 53495cf9bc3..1c61d3ccbc9 100644
--- a/cpp/src/arrow/csv/converter.cc
+++ b/cpp/src/arrow/csv/converter.cc
@@ -431,6 +431,7 @@ Status Converter::Make(const std::shared_ptr<DataType>& type,
     CONVERTER_CASE(Type::BOOL, BooleanConverter)
     CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter)
     CONVERTER_CASE(Type::BINARY, (VarSizeBinaryConverter<BinaryType, false>))
+    CONVERTER_CASE(Type::LARGE_BINARY, (VarSizeBinaryConverter<LargeBinaryType, false>))
     CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
     CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
 
@@ -442,6 +443,14 @@ Status Converter::Make(const std::shared_ptr<DataType>& type,
       }
       break;
 
+    case Type::LARGE_STRING:
+      if (options.check_utf8) {
+        result = new VarSizeBinaryConverter<LargeStringType, true>(type, options, pool);
+      } else {
+        result = new VarSizeBinaryConverter<LargeStringType, false>(type, options, pool);
+      }
+      break;
+
     default: {
       return Status::NotImplemented("CSV conversion to ", type->ToString(),
                                     " is not supported");
diff --git a/cpp/src/arrow/csv/parser-test.cc b/cpp/src/arrow/csv/parser-test.cc
index 36552309b27..d1790b23da1 100644
--- a/cpp/src/arrow/csv/parser-test.cc
+++ b/cpp/src/arrow/csv/parser-test.cc
@@ -439,6 +439,31 @@ TEST(BlockParser, Escaping) {
   }
 }
 
+// Generate test data with the given number of columns.
+std::string MakeLotsOfCsvColumns(int32_t num_columns) {
+  std::string values, header;
+  header.reserve(num_columns * 10);
+  values.reserve(num_columns * 10);
+  for (int x = 0; x < num_columns; x++) {
+    if (x != 0) {
+      header += ",";
+      values += ",";
+    }
+    header += "c" + std::to_string(x);
+    values += std::to_string(x);
+  }
+
+  header += "\n";
+  values += "\n";
+  return MakeCSVData({header, values});
+}
+
+TEST(BlockParser, LotsOfColumns) {
+  auto options = ParseOptions::Defaults();
+  BlockParser parser(options);
+  AssertParseOk(parser, MakeLotsOfCsvColumns(1024 * 100));
+}
+
 TEST(BlockParser, QuotedEscape) {
   auto options = ParseOptions::Defaults();
   options.escaping = true;
diff --git a/cpp/src/arrow/csv/parser.cc b/cpp/src/arrow/csv/parser.cc
index a7ca71c9fd7..89c3f4cb168 100644
--- a/cpp/src/arrow/csv/parser.cc
+++ b/cpp/src/arrow/csv/parser.cc
@@ -397,16 +397,19 @@ Status BlockParser::DoParseSpecialized(const char* start, uint32_t size, bool is
       return ParseError("Empty CSV file or block: cannot infer number of columns");
     }
   }
+
   while (!finished_parsing && data < data_end && num_rows_ < max_num_rows_) {
     // We know the number of columns, so can presize a values array for
     // a given number of rows
     DCHECK_GE(num_cols_, 0);
 
     int32_t rows_in_chunk;
+    constexpr int32_t kTargetChunkSize = 32768;
     if (num_cols_ > 0) {
-      rows_in_chunk = std::min(32768 / num_cols_, max_num_rows_ - num_rows_);
+      rows_in_chunk = std::min(std::max(kTargetChunkSize / num_cols_, 512),
+                               max_num_rows_ - num_rows_);
     } else {
-      rows_in_chunk = std::min(32768, max_num_rows_ - num_rows_);
+      rows_in_chunk = std::min(kTargetChunkSize, max_num_rows_ - num_rows_);
     }
 
     PresizedValuesWriter values_writer(pool_, rows_in_chunk, num_cols_);
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 4731ed11a06..ec4d179fb9c 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -206,13 +206,13 @@ class BaseTableReader : public csv::TableReader {
     DCHECK_EQ(column_builders_.size(), static_cast<uint32_t>(num_cols_));
 
     std::vector<std::shared_ptr<Field>> fields;
-    std::vector<std::shared_ptr<Column>> columns;
+    std::vector<std::shared_ptr<ChunkedArray>> columns;
 
     for (int32_t i = 0; i < num_cols_; ++i) {
       std::shared_ptr<ChunkedArray> array;
       RETURN_NOT_OK(column_builders_[i]->Finish(&array));
-      columns.push_back(std::make_shared<Column>(column_names_[i], array));
-      fields.push_back(columns.back()->field());
+      fields.push_back(::arrow::field(column_names_[i], array->type()));
+      columns.emplace_back(std::move(array));
     }
     *out = Table::Make(schema(fields), columns);
     return Status::OK();
diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index c508dca769f..eb4df587154 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -118,7 +118,7 @@ class GrpcClientAuthReader : public ClientAuthReader {
   Status Read(std::string* token) override {
     pb::HandshakeResponse request;
     if (stream_->Read(&request)) {
-      *token = std::move(*request.release_payload());
+      *token = std::move(*request.mutable_payload());
       return Status::OK();
     }
     return internal::FromGrpcStatus(stream_->Finish());
@@ -368,7 +368,7 @@ class GrpcMetadataReader : public FlightMetadataReader {
   Status ReadMetadata(std::shared_ptr<Buffer>* out) override {
     pb::PutResult message;
     if (reader_->Read(&message)) {
-      *out = Buffer::FromString(std::move(*message.release_app_metadata()));
+      *out = Buffer::FromString(std::move(*message.mutable_app_metadata()));
     } else {
       // Stream finished
       *out = nullptr;
@@ -430,7 +430,12 @@ class FlightClient::FlightClientImpl {
     GrpcClientAuthSender outgoing{stream};
     GrpcClientAuthReader incoming{stream};
     RETURN_NOT_OK(auth_handler_->Authenticate(&outgoing, &incoming));
+    // Explicitly close our side of the connection
+    bool finished_writes = stream->WritesDone();
     RETURN_NOT_OK(internal::FromGrpcStatus(stream->Finish()));
+    if (!finished_writes) {
+      return Status::UnknownError("Could not finish writing before closing");
+    }
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/flight/flight-test.cc b/cpp/src/arrow/flight/flight-test.cc
index d036b9af1f0..e37de8be6ec 100644
--- a/cpp/src/arrow/flight/flight-test.cc
+++ b/cpp/src/arrow/flight/flight-test.cc
@@ -135,7 +135,7 @@ TEST(TestFlightDescriptor, ToFromProto) {
 }
 #endif
 
-TEST(TestFlight, StartStopTestServer) {
+TEST(TestFlight, DISABLED_StartStopTestServer) {
   TestServer server("flight-test-server");
   server.Start();
   ASSERT_TRUE(server.IsRunning());
diff --git a/cpp/src/arrow/flight/internal.cc b/cpp/src/arrow/flight/internal.cc
index 55821495e1c..56fc86234a3 100644
--- a/cpp/src/arrow/flight/internal.cc
+++ b/cpp/src/arrow/flight/internal.cc
@@ -42,6 +42,8 @@ namespace arrow {
 namespace flight {
 namespace internal {
 
+const char* kGrpcAuthHeader = "auth-token-bin";
+
 Status FromGrpcStatus(const grpc::Status& grpc_status) {
   if (grpc_status.ok()) {
     return Status::OK();
diff --git a/cpp/src/arrow/flight/internal.h b/cpp/src/arrow/flight/internal.h
index 5283bed2183..a554e81ff43 100644
--- a/cpp/src/arrow/flight/internal.h
+++ b/cpp/src/arrow/flight/internal.h
@@ -64,7 +64,8 @@ namespace flight {
 namespace internal {
 
 /// The name of the header used to pass authentication tokens.
-static const char* kGrpcAuthHeader = "auth-token-bin";
+ARROW_FLIGHT_EXPORT
+extern const char* kGrpcAuthHeader;
 
 ARROW_FLIGHT_EXPORT
 Status SchemaToString(const Schema& schema, std::string* out);
diff --git a/cpp/src/arrow/flight/perf-server.cc b/cpp/src/arrow/flight/perf-server.cc
index d1131175422..43cede44e0c 100644
--- a/cpp/src/arrow/flight/perf-server.cc
+++ b/cpp/src/arrow/flight/perf-server.cc
@@ -196,7 +196,7 @@ std::unique_ptr<arrow::flight::FlightPerfServer> g_server;
 
 void Shutdown(int signal) {
   if (g_server != nullptr) {
-    g_server->Shutdown();
+    ARROW_CHECK_OK(g_server->Shutdown());
   }
 }
 
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index d059a8be923..849fbf0c042 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -181,7 +181,7 @@ class GrpcServerAuthReader : public ServerAuthReader {
   Status Read(std::string* token) override {
     pb::HandshakeRequest request;
     if (stream_->Read(&request)) {
-      *token = std::move(*request.release_payload());
+      *token = std::move(*request.mutable_payload());
       return Status::OK();
     }
     return Status::IOError("Stream is closed.");
@@ -584,9 +584,13 @@ Status FlightServerBase::Serve() {
 
 int FlightServerBase::GotSignal() const { return impl_->got_signal_; }
 
-void FlightServerBase::Shutdown() {
-  DCHECK(impl_->server_);
+Status FlightServerBase::Shutdown() {
+  auto server = impl_->server_.get();
+  if (!server) {
+    return Status::Invalid("Shutdown() on uninitialized FlightServerBase");
+  }
   impl_->server_->Shutdown();
+  return Status::OK();
 }
 
 Status FlightServerBase::ListFlights(const ServerCallContext& context,
diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h
index 25656e625e1..d53eb4378bf 100644
--- a/cpp/src/arrow/flight/server.h
+++ b/cpp/src/arrow/flight/server.h
@@ -142,7 +142,7 @@ class ARROW_FLIGHT_EXPORT FlightServerBase {
   /// thread while Serve() blocks.
   ///
   /// TODO(wesm): Shutdown with deadline
-  void Shutdown();
+  Status Shutdown();
 
   // Implement these methods to create your own server. The default
   // implementations will return a not-implemented result to the client
diff --git a/cpp/src/arrow/flight/test-util.cc b/cpp/src/arrow/flight/test-util.cc
index 4408801a97e..e93e83f02a5 100644
--- a/cpp/src/arrow/flight/test-util.cc
+++ b/cpp/src/arrow/flight/test-util.cc
@@ -142,7 +142,7 @@ Status InProcessTestServer::Start() {
 }
 
 void InProcessTestServer::Stop() {
-  server_->Shutdown();
+  ARROW_CHECK_OK(server_->Shutdown());
   thread_.join();
 }
 
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index c82e6813648..86aa2237c76 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -31,6 +31,11 @@
 namespace arrow {
 namespace flight {
 
+const char* kSchemeGrpc = "grpc";
+const char* kSchemeGrpcTcp = "grpc+tcp";
+const char* kSchemeGrpcUnix = "grpc+unix";
+const char* kSchemeGrpcTls = "grpc+tls";
+
 bool FlightDescriptor::Equals(const FlightDescriptor& other) const {
   if (type != other.type) {
     return false;
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index abf894c88c8..b4c4c6ca42d 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -135,10 +135,14 @@ struct ARROW_FLIGHT_EXPORT Ticket {
 class FlightClient;
 class FlightServerBase;
 
-static const char* kSchemeGrpc = "grpc";
-static const char* kSchemeGrpcTcp = "grpc+tcp";
-static const char* kSchemeGrpcUnix = "grpc+unix";
-static const char* kSchemeGrpcTls = "grpc+tls";
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpc;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcTcp;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcUnix;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcTls;
 
 /// \brief A host location (a URI)
 struct ARROW_FLIGHT_EXPORT Location {
diff --git a/cpp/src/arrow/io/file-benchmark.cc b/cpp/src/arrow/io/file-benchmark.cc
index 74b92cbf3d6..b0880fdd485 100644
--- a/cpp/src/arrow/io/file-benchmark.cc
+++ b/cpp/src/arrow/io/file-benchmark.cc
@@ -20,6 +20,8 @@
 #include "arrow/io/file.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/io-util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/windows_compatibility.h"
 
 #include "benchmark/benchmark.h"
 
@@ -30,7 +32,11 @@
 #include <thread>
 #include <valarray>
 
-#ifndef _WIN32
+#ifdef _WIN32
+
+#include <io.h>
+
+#else
 
 #include <fcntl.h>
 #include <poll.h>
@@ -40,17 +46,82 @@
 
 namespace arrow {
 
-#ifndef _WIN32
-
-std::string GetNullFile() { return "/dev/null"; }
+std::string GetNullFile() {
+#ifdef _WIN32
+  return "NUL";
+#else
+  return "/dev/null";
+#endif
+}
 
 const std::valarray<int64_t> small_sizes = {8, 24, 33, 1, 32, 192, 16, 40};
 const std::valarray<int64_t> large_sizes = {8192, 100000};
 
 constexpr int64_t kBufferSize = 4096;
 
+#ifdef _WIN32
+
+class BackgroundReader {
+  // A class that reads data in the background from a file descriptor
+  // (Windows implementation)
+
+ public:
+  static std::shared_ptr<BackgroundReader> StartReader(int fd) {
+    std::shared_ptr<BackgroundReader> reader(new BackgroundReader(fd));
+    reader->worker_.reset(new std::thread([=] { reader->LoopReading(); }));
+    return reader;
+  }
+  void Stop() { ARROW_CHECK(SetEvent(event_)); }
+  void Join() { worker_->join(); }
+
+  ~BackgroundReader() {
+    ABORT_NOT_OK(internal::FileClose(fd_));
+    ARROW_CHECK(CloseHandle(event_));
+  }
+
+ protected:
+  explicit BackgroundReader(int fd) : fd_(fd), total_bytes_(0) {
+    file_handle_ = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+    ARROW_CHECK_NE(file_handle_, INVALID_HANDLE_VALUE);
+    event_ =
+        CreateEvent(nullptr, /* bManualReset=*/TRUE, /* bInitialState=*/FALSE, nullptr);
+    ARROW_CHECK_NE(event_, INVALID_HANDLE_VALUE);
+  }
+
+  void LoopReading() {
+    const HANDLE handles[] = {file_handle_, event_};
+    while (true) {
+      DWORD ret = WaitForMultipleObjects(2, handles, /* bWaitAll=*/FALSE, INFINITE);
+      ARROW_CHECK_NE(ret, WAIT_FAILED);
+      if (ret == WAIT_OBJECT_0 + 1) {
+        // Got stop request
+        break;
+      } else if (ret == WAIT_OBJECT_0) {
+        // File ready for reading
+        int64_t bytes_read;
+        ARROW_CHECK_OK(internal::FileRead(fd_, buffer_, buffer_size_, &bytes_read));
+        total_bytes_ += bytes_read;
+      } else {
+        ARROW_LOG(FATAL) << "Unexpected WaitForMultipleObjects return value " << ret;
+      }
+    }
+  }
+
+  int fd_;
+  HANDLE file_handle_, event_;
+  int64_t total_bytes_;
+
+  static const int64_t buffer_size_ = 16384;
+  uint8_t buffer_[buffer_size_];
+
+  std::unique_ptr<std::thread> worker_;
+};
+
+#else
+
 class BackgroundReader {
   // A class that reads data in the background from a file descriptor
+  // (Unix implementation)
 
  public:
   static std::shared_ptr<BackgroundReader> StartReader(int fd) {
@@ -116,6 +187,8 @@ class BackgroundReader {
   std::unique_ptr<std::thread> worker_;
 };
 
+#endif
+
 // Set up a pipe with an OutputStream at one end and a BackgroundReader at
 // the other end.
 static void SetupPipeWriter(std::shared_ptr<io::OutputStream>* stream,
@@ -139,6 +212,9 @@ static void BenchmarkStreamingWrites(benchmark::State& state,
       ABORT_NOT_OK(stream->Write(data, size));
     }
   }
+  // For Windows: need to close writer before joining reader thread.
+  ABORT_NOT_OK(stream->Close());
+
   const int64_t total_bytes = static_cast<int64_t>(state.iterations()) * sum_sizes;
   state.SetBytesProcessed(total_bytes);
 
@@ -147,7 +223,6 @@ static void BenchmarkStreamingWrites(benchmark::State& state,
     reader->Stop();
     reader->Join();
   }
-  ABORT_NOT_OK(stream->Close());
 }
 
 // Benchmark writing to /dev/null
@@ -232,6 +307,4 @@ BENCHMARK(BufferedOutputStreamSmallWritesToNull)->UseRealTime();
 BENCHMARK(BufferedOutputStreamSmallWritesToPipe)->UseRealTime();
 BENCHMARK(BufferedOutputStreamLargeWritesToPipe)->UseRealTime();
 
-#endif  // ifndef _WIN32
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/hdfs-internal.cc b/cpp/src/arrow/io/hdfs-internal.cc
index c273ab45f63..c4d6c9c5082 100644
--- a/cpp/src/arrow/io/hdfs-internal.cc
+++ b/cpp/src/arrow/io/hdfs-internal.cc
@@ -162,6 +162,8 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
       "/usr/local/lib/jvm/java",                  // alt rhel6
       "/usr/local/lib/jvm",                       // alt centos6
       "/usr/local/lib64/jvm",                     // alt opensuse 13
+      "/usr/local/lib/jvm/java-8-openjdk-amd64",  // alt ubuntu / debian distros
+      "/usr/lib/jvm/java-8-openjdk-amd64",        // alt ubuntu / debian distros
       "/usr/local/lib/jvm/java-7-openjdk-amd64",  // alt ubuntu / debian distros
       "/usr/lib/jvm/java-7-openjdk-amd64",        // alt ubuntu / debian distros
       "/usr/local/lib/jvm/java-6-openjdk-amd64",  // alt ubuntu / debian distros
diff --git a/cpp/src/arrow/io/mman.h b/cpp/src/arrow/io/mman.h
index 61254925609..5826891a60b 100644
--- a/cpp/src/arrow/io/mman.h
+++ b/cpp/src/arrow/io/mman.h
@@ -14,6 +14,8 @@
 #include <io.h>
 #include <sys/types.h>
 
+#include <cstdint>
+
 #define PROT_NONE 0
 #define PROT_READ 1
 #define PROT_WRITE 2
@@ -75,28 +77,17 @@ static inline void* mmap(void* addr, size_t len, int prot, int flags, int fildes
   HANDLE fm, h;
 
   void* map = MAP_FAILED;
+  const uint64_t off64 = static_cast<uint64_t>(off);
+  const uint64_t maxSize = off64 + len;
 
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4293)
-#endif
+  const DWORD dwFileOffsetLow = static_cast<DWORD>(off64 & 0xFFFFFFFFUL);
+  const DWORD dwFileOffsetHigh = static_cast<DWORD>((off64 >> 32) & 0xFFFFFFFFUL);
+  const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFUL);
+  const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFUL);
 
-  const DWORD dwFileOffsetLow =
-      (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
-  const DWORD dwFileOffsetHigh =
-      (sizeof(off_t) <= sizeof(DWORD)) ? (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
   const DWORD protect = __map_mmap_prot_page(prot);
   const DWORD desiredAccess = __map_mmap_prot_file(prot);
 
-  const size_t maxSize = off + len;
-
-  const DWORD dwMaxSizeLow = static_cast<DWORD>(maxSize & 0xFFFFFFFFL);
-  const DWORD dwMaxSizeHigh = static_cast<DWORD>((maxSize >> 32) & 0xFFFFFFFFL);
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
   errno = 0;
 
   if (len == 0
diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc
index 001e36ac0df..af68a5dc929 100644
--- a/cpp/src/arrow/ipc/feather-test.cc
+++ b/cpp/src/arrow/ipc/feather-test.cc
@@ -425,14 +425,14 @@ TEST_F(TestTableWriter, PrimitiveRoundTrip) {
   ASSERT_OK(writer_->Append("f1", *batch->column(1)));
   Finish();
 
-  std::shared_ptr<Column> col;
+  std::shared_ptr<ChunkedArray> col;
   ASSERT_OK(reader_->GetColumn(0, &col));
-  ASSERT_TRUE(col->data()->chunk(0)->Equals(batch->column(0)));
-  ASSERT_EQ("f0", col->name());
+  ASSERT_TRUE(col->chunk(0)->Equals(batch->column(0)));
+  ASSERT_EQ("f0", reader_->GetColumnName(0));
 
   ASSERT_OK(reader_->GetColumn(1, &col));
-  ASSERT_TRUE(col->data()->chunk(0)->Equals(batch->column(1)));
-  ASSERT_EQ("f1", col->name());
+  ASSERT_TRUE(col->chunk(0)->Equals(batch->column(1)));
+  ASSERT_EQ("f1", reader_->GetColumnName(1));
 }
 
 TEST_F(TestTableWriter, CategoryRoundtrip) {
@@ -502,14 +502,14 @@ TEST_F(TestTableWriter, PrimitiveNullRoundTrip) {
   }
   Finish();
 
-  std::shared_ptr<Column> col;
+  std::shared_ptr<ChunkedArray> col;
   for (int i = 0; i < batch->num_columns(); ++i) {
     ASSERT_OK(reader_->GetColumn(i, &col));
-    ASSERT_EQ(batch->column_name(i), col->name());
+    ASSERT_EQ(batch->column_name(i), reader_->GetColumnName(i));
     StringArray str_values(batch->column(i)->length(), nullptr, nullptr,
                            batch->column(i)->null_bitmap(),
                            batch->column(i)->null_count());
-    CheckArrays(str_values, *col->data()->chunk(0));
+    CheckArrays(str_values, *col->chunk(0));
   }
 }
 
@@ -527,14 +527,14 @@ class TestTableWriterSlice : public TestTableWriter,
     ASSERT_OK(writer_->Append("f1", *batch->column(1)));
     Finish();
 
-    std::shared_ptr<Column> col;
+    std::shared_ptr<ChunkedArray> col;
     ASSERT_OK(reader_->GetColumn(0, &col));
-    ASSERT_TRUE(col->data()->chunk(0)->Equals(batch->column(0)));
-    ASSERT_EQ("f0", col->name());
+    ASSERT_TRUE(col->chunk(0)->Equals(batch->column(0)));
+    ASSERT_EQ("f0", reader_->GetColumnName(0));
 
     ASSERT_OK(reader_->GetColumn(1, &col));
-    ASSERT_TRUE(col->data()->chunk(0)->Equals(batch->column(1)));
-    ASSERT_EQ("f1", col->name());
+    ASSERT_TRUE(col->chunk(0)->Equals(batch->column(1)));
+    ASSERT_EQ("f1", reader_->GetColumnName(1));
   }
 };
 
diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc
index 5965d361631..8436bd205b5 100644
--- a/cpp/src/arrow/ipc/feather.cc
+++ b/cpp/src/arrow/ipc/feather.cc
@@ -367,6 +367,8 @@ class TableReader::TableReaderImpl {
           PRIMITIVE_CASE(DOUBLE, float64);
           PRIMITIVE_CASE(UTF8, utf8);
           PRIMITIVE_CASE(BINARY, binary);
+          PRIMITIVE_CASE(LARGE_UTF8, large_utf8);
+          PRIMITIVE_CASE(LARGE_BINARY, large_binary);
           default:
             return Status::Invalid("Unrecognized type");
         }
@@ -410,6 +412,10 @@ class TableReader::TableReaderImpl {
       int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int32_t));
       buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
       offset += offsets_size;
+    } else if (is_large_binary_like(type->id())) {
+      int64_t offsets_size = GetOutputLength((meta->length() + 1) * sizeof(int64_t));
+      buffers.push_back(SliceBuffer(buffer, offset, offsets_size));
+      offset += offsets_size;
     }
 
     buffers.push_back(SliceBuffer(buffer, offset, buffer->size() - offset));
@@ -434,7 +440,7 @@ class TableReader::TableReaderImpl {
     return col_meta->name()->str();
   }
 
-  Status GetColumn(int i, std::shared_ptr<Column>* out) {
+  Status GetColumn(int i, std::shared_ptr<ChunkedArray>* out) {
     const fbs::Column* col_meta = metadata_->column(i);
 
     // auto user_meta = column->user_metadata();
@@ -443,18 +449,18 @@ class TableReader::TableReaderImpl {
     std::shared_ptr<Array> values;
     RETURN_NOT_OK(LoadValues(col_meta->values(), col_meta->metadata_type(),
                              col_meta->metadata(), &values));
-    out->reset(new Column(col_meta->name()->str(), values));
+    *out = std::make_shared<ChunkedArray>(values);
     return Status::OK();
   }
 
   Status Read(std::shared_ptr<Table>* out) {
     std::vector<std::shared_ptr<Field>> fields;
-    std::vector<std::shared_ptr<Column>> columns;
+    std::vector<std::shared_ptr<ChunkedArray>> columns;
     for (int i = 0; i < num_columns(); ++i) {
-      std::shared_ptr<Column> column;
+      std::shared_ptr<ChunkedArray> column;
       RETURN_NOT_OK(GetColumn(i, &column));
       columns.push_back(column);
-      fields.push_back(column->field());
+      fields.push_back(::arrow::field(GetColumnName(i), column->type()));
     }
     *out = Table::Make(schema(fields), columns);
     return Status::OK();
@@ -462,7 +468,7 @@ class TableReader::TableReaderImpl {
 
   Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out) {
     std::vector<std::shared_ptr<Field>> fields;
-    std::vector<std::shared_ptr<Column>> columns;
+    std::vector<std::shared_ptr<ChunkedArray>> columns;
     for (int i = 0; i < num_columns(); ++i) {
       bool found = false;
       for (auto j : indices) {
@@ -474,10 +480,10 @@ class TableReader::TableReaderImpl {
       if (!found) {
         continue;
       }
-      std::shared_ptr<Column> column;
+      std::shared_ptr<ChunkedArray> column;
       RETURN_NOT_OK(GetColumn(i, &column));
       columns.push_back(column);
-      fields.push_back(column->field());
+      fields.push_back(::arrow::field(GetColumnName(i), column->type()));
     }
     *out = Table::Make(schema(fields), columns);
     return Status::OK();
@@ -485,7 +491,7 @@ class TableReader::TableReaderImpl {
 
   Status Read(const std::vector<std::string>& names, std::shared_ptr<Table>* out) {
     std::vector<std::shared_ptr<Field>> fields;
-    std::vector<std::shared_ptr<Column>> columns;
+    std::vector<std::shared_ptr<ChunkedArray>> columns;
     for (int i = 0; i < num_columns(); ++i) {
       auto name = GetColumnName(i);
       bool found = false;
@@ -498,10 +504,10 @@ class TableReader::TableReaderImpl {
       if (!found) {
         continue;
       }
-      std::shared_ptr<Column> column;
+      std::shared_ptr<ChunkedArray> column;
       RETURN_NOT_OK(GetColumn(i, &column));
       columns.push_back(column);
-      fields.push_back(column->field());
+      fields.push_back(::arrow::field(name, column->type()));
     }
     *out = Table::Make(schema(fields), columns);
     return Status::OK();
@@ -539,7 +545,7 @@ int64_t TableReader::num_columns() const { return impl_->num_columns(); }
 
 std::string TableReader::GetColumnName(int i) const { return impl_->GetColumnName(i); }
 
-Status TableReader::GetColumn(int i, std::shared_ptr<Column>* out) {
+Status TableReader::GetColumn(int i, std::shared_ptr<ChunkedArray>* out) {
   return impl_->GetColumn(i, out);
 }
 
@@ -585,6 +591,10 @@ fbs::Type ToFlatbufferType(Type::type type) {
       return fbs::Type_UTF8;
     case Type::BINARY:
       return fbs::Type_BINARY;
+    case Type::LARGE_STRING:
+      return fbs::Type_LARGE_UTF8;
+    case Type::LARGE_BINARY:
+      return fbs::Type_LARGE_BINARY;
     case Type::DATE32:
       return fbs::Type_INT32;
     case Type::TIMESTAMP:
@@ -644,7 +654,8 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
   }
 
   Status LoadArrayMetadata(const Array& values, ArrayMetadata* meta) {
-    if (!(is_primitive(values.type_id()) || is_binary_like(values.type_id()))) {
+    if (!(is_primitive(values.type_id()) || is_binary_like(values.type_id()) ||
+          is_large_binary_like(values.type_id()))) {
       return Status::Invalid("Array is not primitive type: ", values.type()->ToString());
     }
 
@@ -659,6 +670,32 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
     return Status::OK();
   }
 
+  template <typename ArrayType>
+  Status WriteBinaryArray(const ArrayType& values, ArrayMetadata* meta,
+                          const uint8_t** values_buffer, int64_t* values_bytes,
+                          int64_t* bytes_written) {
+    using offset_type = typename ArrayType::offset_type;
+
+    int64_t offset_bytes = sizeof(offset_type) * (values.length() + 1);
+
+    if (values.value_offsets()) {
+      *values_bytes = values.raw_value_offsets()[values.length()];
+
+      // Write the variable-length offsets
+      RETURN_NOT_OK(WritePadded(
+          stream_.get(), reinterpret_cast<const uint8_t*>(values.raw_value_offsets()),
+          offset_bytes, bytes_written));
+    } else {
+      RETURN_NOT_OK(WritePaddedBlank(stream_.get(), offset_bytes, bytes_written));
+    }
+    meta->total_bytes += *bytes_written;
+
+    if (values.value_data()) {
+      *values_buffer = values.value_data()->data();
+    }
+    return Status::OK();
+  }
+
   Status WriteArray(const Array& values, ArrayMetadata* meta) {
     RETURN_NOT_OK(CheckStarted());
     RETURN_NOT_OK(LoadArrayMetadata(values, meta));
@@ -687,26 +724,11 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
     const uint8_t* values_buffer = nullptr;
 
     if (is_binary_like(values.type_id())) {
-      const auto& bin_values = checked_cast<const BinaryArray&>(values);
-
-      int64_t offset_bytes = sizeof(int32_t) * (values.length() + 1);
-
-      if (bin_values.value_offsets()) {
-        values_bytes = bin_values.raw_value_offsets()[values.length()];
-
-        // Write the variable-length offsets
-        RETURN_NOT_OK(
-            WritePadded(stream_.get(),
-                        reinterpret_cast<const uint8_t*>(bin_values.raw_value_offsets()),
-                        offset_bytes, &bytes_written));
-      } else {
-        RETURN_NOT_OK(WritePaddedBlank(stream_.get(), offset_bytes, &bytes_written));
-      }
-      meta->total_bytes += bytes_written;
-
-      if (bin_values.value_data()) {
-        values_buffer = bin_values.value_data()->data();
-      }
+      RETURN_NOT_OK(WriteBinaryArray(checked_cast<const BinaryArray&>(values), meta,
+                                     &values_buffer, &values_bytes, &bytes_written));
+    } else if (is_large_binary_like(values.type_id())) {
+      RETURN_NOT_OK(WriteBinaryArray(checked_cast<const LargeBinaryArray&>(values), meta,
+                                     &values_buffer, &values_bytes, &bytes_written));
     } else {
       const auto& prim_values = checked_cast<const PrimitiveArray&>(values);
       const auto& fw_type = checked_cast<const FixedWidthType&>(*values.type());
@@ -760,6 +782,8 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
   VISIT_PRIMITIVE(DoubleArray)
   VISIT_PRIMITIVE(BinaryArray)
   VISIT_PRIMITIVE(StringArray)
+  VISIT_PRIMITIVE(LargeBinaryArray)
+  VISIT_PRIMITIVE(LargeStringArray)
 
 #undef VISIT_PRIMITIVE
 
@@ -813,9 +837,8 @@ class TableWriter::TableWriterImpl : public ArrayVisitor {
   Status Write(const Table& table) {
     for (int i = 0; i < table.num_columns(); ++i) {
       auto column = table.column(i);
-      current_column_ = metadata_.AddColumn(column->name());
-      auto chunked_array = column->data();
-      for (const auto chunk : chunked_array->chunks()) {
+      current_column_ = metadata_.AddColumn(table.field(i)->name());
+      for (const auto chunk : column->chunks()) {
         RETURN_NOT_OK(chunk->Accept(this));
       }
       RETURN_NOT_OK(current_column_->Finish());
diff --git a/cpp/src/arrow/ipc/feather.fbs b/cpp/src/arrow/ipc/feather.fbs
index a27d39989c6..5ec06299864 100644
--- a/cpp/src/arrow/ipc/feather.fbs
+++ b/cpp/src/arrow/ipc/feather.fbs
@@ -48,7 +48,10 @@ enum Type : byte {
 
   TIMESTAMP = 14,
   DATE = 15,
-  TIME = 16
+  TIME = 16,
+
+  LARGE_UTF8 = 17,
+  LARGE_BINARY = 18
 }
 
 enum Encoding : byte {
diff --git a/cpp/src/arrow/ipc/feather.h b/cpp/src/arrow/ipc/feather.h
index b6bd4ff5e5b..c4b5f6b2718 100644
--- a/cpp/src/arrow/ipc/feather.h
+++ b/cpp/src/arrow/ipc/feather.h
@@ -31,7 +31,7 @@
 namespace arrow {
 
 class Array;
-class Column;
+class ChunkedArray;
 class Status;
 class Table;
 
@@ -84,14 +84,14 @@ class ARROW_EXPORT TableReader {
 
   std::string GetColumnName(int i) const;
 
-  /// \brief Read a column from the file as an arrow::Column.
+  /// \brief Read a column from the file as an arrow::ChunkedArray.
   ///
   /// \param[in] i the column index to read
   /// \param[out] out the returned column
   /// \return Status
   ///
   /// This function is zero-copy if the file source supports zero-copy reads
-  Status GetColumn(int i, std::shared_ptr<Column>* out);
+  Status GetColumn(int i, std::shared_ptr<ChunkedArray>* out);
 
   /// \brief Read all columns from the file as an arrow::Table.
   ///
diff --git a/cpp/src/arrow/ipc/json-internal.cc b/cpp/src/arrow/ipc/json-internal.cc
index 135296551c9..49a884e1f88 100644
--- a/cpp/src/arrow/ipc/json-internal.cc
+++ b/cpp/src/arrow/ipc/json-internal.cc
@@ -312,6 +312,10 @@ class SchemaWriter {
   Status Visit(const TimeType& type) { return WritePrimitive("time", type); }
   Status Visit(const StringType& type) { return WriteVarBytes("utf8", type); }
   Status Visit(const BinaryType& type) { return WriteVarBytes("binary", type); }
+  Status Visit(const LargeStringType& type) { return WriteVarBytes("large_utf8", type); }
+  Status Visit(const LargeBinaryType& type) {
+    return WriteVarBytes("large_binary", type);
+  }
   Status Visit(const FixedSizeBinaryType& type) {
     return WritePrimitive("fixedsizebinary", type);
   }
@@ -430,20 +434,26 @@ class ArrayWriter {
     }
   }
 
-  // Binary, encode to hexadecimal. UTF8 string write as is
+  // Binary, encode to hexadecimal.
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryArray, T>::value, void>::type
+  typename std::enable_if<std::is_same<BinaryArray, T>::value ||
+                              std::is_same<LargeBinaryArray, T>::value,
+                          void>::type
   WriteDataValues(const T& arr) {
     for (int64_t i = 0; i < arr.length(); ++i) {
-      int32_t length;
-      const uint8_t* buf = arr.GetValue(i, &length);
+      writer_->String(HexEncode(arr.GetView(i)));
+    }
+  }
 
-      if (std::is_base_of<StringArray, T>::value) {
-        // Presumed UTF-8
-        writer_->String(reinterpret_cast<const char*>(buf), length);
-      } else {
-        writer_->String(HexEncode(buf, length));
-      }
+  // UTF8 string, write as is
+  template <typename T>
+  typename std::enable_if<std::is_same<StringArray, T>::value ||
+                              std::is_same<LargeStringArray, T>::value,
+                          void>::type
+  WriteDataValues(const T& arr) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      auto view = arr.GetView(i);
+      writer_->String(view.data(), static_cast<rj::SizeType>(view.size()));
     }
   }
 
@@ -558,8 +568,10 @@ class ArrayWriter {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryArray, T>::value, Status>::type Visit(
-      const T& array) {
+  typename std::enable_if<std::is_base_of<BinaryArray, T>::value ||
+                              std::is_base_of<LargeBinaryArray, T>::value,
+                          Status>::type
+  Visit(const T& array) {
     WriteValidityField(array);
     WriteIntegerField("OFFSET", array.raw_value_offsets(), array.length() + 1);
     WriteDataField(array);
@@ -911,6 +923,10 @@ static Status GetType(const RjObject& json_type,
     *type = utf8();
   } else if (type_name == "binary") {
     *type = binary();
+  } else if (type_name == "large_utf8") {
+    *type = large_utf8();
+  } else if (type_name == "large_binary") {
+    *type = large_binary();
   } else if (type_name == "fixedsizebinary") {
     return GetFixedSizeBinary(json_type, type);
   } else if (type_name == "decimal") {
@@ -1091,9 +1107,10 @@ class ArrayReader {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type Visit(
+  typename std::enable_if<std::is_base_of<BaseBinaryType, T>::value, Status>::type Visit(
       const T& type) {
     typename TypeTraits<T>::BuilderType builder(pool_);
+    using offset_type = typename T::offset_type;
 
     const auto& json_data = obj_.FindMember(kData);
     RETURN_NOT_ARRAY(kData, json_data, obj_);
@@ -1110,23 +1127,27 @@ class ArrayReader {
 
       const rj::Value& val = json_data_arr[i];
       DCHECK(val.IsString());
-      if (std::is_base_of<StringType, T>::value) {
+
+      if (T::is_utf8) {
         RETURN_NOT_OK(builder.Append(val.GetString()));
       } else {
         std::string hex_string = val.GetString();
 
-        DCHECK(hex_string.size() % 2 == 0) << "Expected base16 hex string";
-        int32_t length = static_cast<int>(hex_string.size()) / 2;
+        if (hex_string.size() % 2 != 0) {
+          return Status::Invalid("Expected base16 hex string");
+        }
+        const auto value_len = static_cast<int64_t>(hex_string.size()) / 2;
 
         std::shared_ptr<Buffer> byte_buffer;
-        RETURN_NOT_OK(AllocateBuffer(pool_, length, &byte_buffer));
+        RETURN_NOT_OK(AllocateBuffer(pool_, value_len, &byte_buffer));
 
         const char* hex_data = hex_string.c_str();
         uint8_t* byte_buffer_data = byte_buffer->mutable_data();
-        for (int32_t j = 0; j < length; ++j) {
+        for (int64_t j = 0; j < value_len; ++j) {
           RETURN_NOT_OK(ParseHexValue(hex_data + j * 2, &byte_buffer_data[j]));
         }
-        RETURN_NOT_OK(builder.Append(byte_buffer_data, length));
+        RETURN_NOT_OK(
+            builder.Append(byte_buffer_data, static_cast<offset_type>(value_len)));
       }
     }
 
diff --git a/cpp/src/arrow/ipc/json-simple-test.cc b/cpp/src/arrow/ipc/json-simple-test.cc
index 772557b12bd..b5f68e0c7cb 100644
--- a/cpp/src/arrow/ipc/json-simple-test.cc
+++ b/cpp/src/arrow/ipc/json-simple-test.cc
@@ -322,6 +322,21 @@ TEST(TestString, Basics) {
   AssertJSONArray<BinaryType, std::string>(type, "[\"\\u0000\\u001f\"]", {s});
 }
 
+TEST(TestLargeString, Basics) {
+  // Similar as TestString above, only testing the basics
+  std::shared_ptr<DataType> type = large_utf8();
+  std::shared_ptr<Array> expected, actual;
+
+  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", \"foo\"]", {"", "foo"});
+  AssertJSONArray<LargeStringType, std::string>(type, "[\"\", null]", {true, false},
+                                                {"", ""});
+
+  // Large binary type
+  type = large_binary();
+  AssertJSONArray<LargeBinaryType, std::string>(type, "[\"\", \"foo\", null]",
+                                                {true, true, false}, {"", "foo", ""});
+}
+
 TEST(TestTimestamp, Basics) {
   // Timestamp type
   auto type = timestamp(TimeUnit::SECOND);
@@ -905,6 +920,196 @@ TEST(TestStruct, Errors) {
   ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]", &array));
 }
 
+TEST(TestDenseUnion, Basics) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+
+  auto type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
+  auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]");
+
+  auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
+  auto expected_a = ArrayFromJSON(int8(), "[122, null]");
+  auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeDense(*expected_types, *expected_offsets,
+                                  {expected_a, expected_b}, {"a", "b"}, {4, 8},
+                                  &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestSparseUnion, Basics) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+
+  auto type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
+  auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]");
+
+  auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+  auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
+  auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeSparse(*expected_types, {expected_a, expected_b}, {"a", "b"},
+                                   {4, 8}, &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestDenseUnion, ListOfUnion) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
+  auto list_type = list(union_type);
+  auto array = ArrayFromJSON(list_type,
+                             "["
+                             "[[4, 122], [8, true]],"
+                             "[[4, null], null, [8, false]]"
+                             "]");
+
+  auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
+  auto expected_a = ArrayFromJSON(int8(), "[122, null]");
+  auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
+
+  std::shared_ptr<Array> expected_values, expected;
+  ASSERT_OK(UnionArray::MakeDense(*expected_types, *expected_offsets,
+                                  {expected_a, expected_b}, {"a", "b"}, {4, 8},
+                                  &expected_values));
+  auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
+  ASSERT_OK(ListArray::FromArrays(*expected_list_offsets, *expected_values,
+                                  default_memory_pool(), &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestSparseUnion, ListOfUnion) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
+  auto list_type = list(union_type);
+  auto array = ArrayFromJSON(list_type,
+                             "["
+                             "[[4, 122], [8, true]],"
+                             "[[4, null], null, [8, false]]"
+                             "]");
+
+  auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+  auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
+  auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null, false]");
+
+  std::shared_ptr<Array> expected_values, expected;
+  ASSERT_OK(UnionArray::MakeSparse(*expected_types, {expected_a, expected_b}, {"a", "b"},
+                                   {4, 8}, &expected_values));
+  auto expected_list_offsets = ArrayFromJSON(int32(), "[0, 2, 5]");
+  ASSERT_OK(ListArray::FromArrays(*expected_list_offsets, *expected_values,
+                                  default_memory_pool(), &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestDenseUnion, UnionOfStructs) {
+  std::vector<std::shared_ptr<Field>> fields = {
+      field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
+      field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
+                            field("foxtrot", list(int8()))})),
+      field("q", struct_({field("quebec", utf8())}))};
+  auto type = union_(fields, {0, 23, 47}, UnionMode::DENSE);
+  auto array = ArrayFromJSON(type, R"([
+    [0, {"alpha": 0.0, "bravo": "charlie"}],
+    [23, {"whiskey": 99}],
+    [0, {"bravo": "mike"}],
+    null,
+    [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
+  ])");
+
+  auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
+  auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
+  ArrayVector expected_fields = {ArrayFromJSON(fields[0]->type(), R"([
+      {"alpha": 0.0, "bravo": "charlie"},
+      {"bravo": "mike"}
+    ])"),
+                                 ArrayFromJSON(fields[1]->type(), R"([
+      {"whiskey": 99},
+      {"tango": 8.25, "foxtrot": [0, 2, 3]}
+    ])"),
+                                 ArrayFromJSON(fields[2]->type(), "[]")};
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeDense(*expected_types, *expected_offsets, expected_fields,
+                                  {"ab", "wtf", "q"}, {0, 23, 47}, &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestSparseUnion, UnionOfStructs) {
+  std::vector<std::shared_ptr<Field>> fields = {
+      field("ab", struct_({field("alpha", float64()), field("bravo", utf8())})),
+      field("wtf", struct_({field("whiskey", int8()), field("tango", float64()),
+                            field("foxtrot", list(int8()))})),
+      field("q", struct_({field("quebec", utf8())}))};
+  auto type = union_(fields, {0, 23, 47}, UnionMode::SPARSE);
+  auto array = ArrayFromJSON(type, R"([
+    [0, {"alpha": 0.0, "bravo": "charlie"}],
+    [23, {"whiskey": 99}],
+    [0, {"bravo": "mike"}],
+    null,
+    [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
+  ])");
+
+  auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
+  ArrayVector expected_fields = {
+      ArrayFromJSON(fields[0]->type(), R"([
+      {"alpha": 0.0, "bravo": "charlie"},
+      null,
+      {"bravo": "mike"},
+      null,
+      null
+    ])"),
+      ArrayFromJSON(fields[1]->type(), R"([
+      null,
+      {"whiskey": 99},
+      null,
+      null,
+      {"tango": 8.25, "foxtrot": [0, 2, 3]}
+    ])"),
+      ArrayFromJSON(fields[2]->type(), "[null, null, null, null, null]")};
+
+  std::shared_ptr<Array> expected;
+  ASSERT_OK(UnionArray::MakeSparse(*expected_types, expected_fields, {"ab", "wtf", "q"},
+                                   {0, 23, 47}, &expected));
+
+  ASSERT_ARRAYS_EQUAL(*expected, *array);
+}
+
+TEST(TestDenseUnion, Errors) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE);
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 8]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
+}
+
+TEST(TestSparseUnion, Errors) {
+  auto field_a = field("a", int8());
+  auto field_b = field("b", boolean());
+  std::shared_ptr<DataType> type = union_({field_a, field_b}, {4, 8}, UnionMode::SPARSE);
+  std::shared_ptr<Array> array;
+
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 8]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]", &array));
+  ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]", &array));
+}
+
 }  // namespace json
 }  // namespace internal
 }  // namespace ipc
diff --git a/cpp/src/arrow/ipc/json-simple.cc b/cpp/src/arrow/ipc/json-simple.cc
index f850f3d2b06..ce0d2c53c63 100644
--- a/cpp/src/arrow/ipc/json-simple.cc
+++ b/cpp/src/arrow/ipc/json-simple.cc
@@ -26,6 +26,7 @@
 #include "arrow/ipc/json-internal.h"
 #include "arrow/ipc/json-simple.h"
 #include "arrow/memory_pool.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
@@ -344,11 +345,14 @@ class TimestampConverter final : public ConcreteConverter<TimestampConverter> {
 // ------------------------------------------------------------------------
 // Converter for binary and string arrays
 
-class StringConverter final : public ConcreteConverter<StringConverter> {
+template <typename TYPE>
+class StringConverter final : public ConcreteConverter<StringConverter<TYPE>> {
  public:
+  using BuilderType = typename TypeTraits<TYPE>::BuilderType;
+
   explicit StringConverter(const std::shared_ptr<DataType>& type) {
     this->type_ = type;
-    builder_ = std::make_shared<BinaryBuilder>(type, default_memory_pool());
+    builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
   }
 
   Status AppendNull() override { return builder_->AppendNull(); }
@@ -368,7 +372,7 @@ class StringConverter final : public ConcreteConverter<StringConverter> {
   std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
 
  private:
-  std::shared_ptr<BinaryBuilder> builder_;
+  std::shared_ptr<BuilderType> builder_;
 };
 
 // ------------------------------------------------------------------------
@@ -610,6 +614,94 @@ class StructConverter final : public ConcreteConverter<StructConverter> {
   std::vector<std::shared_ptr<Converter>> child_converters_;
 };
 
+// ------------------------------------------------------------------------
+// Converter for struct arrays
+
+class UnionConverter final : public ConcreteConverter<UnionConverter> {
+ public:
+  explicit UnionConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+  Status Init() override {
+    auto union_type = checked_cast<const UnionType*>(type_.get());
+    mode_ = union_type->mode();
+    type_id_to_child_num_.clear();
+    type_id_to_child_num_.resize(union_type->max_type_code() + 1, -1);
+    int child_i = 0;
+    for (auto type_id : union_type->type_codes()) {
+      type_id_to_child_num_[type_id] = child_i++;
+    }
+    std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+    for (const auto& field : type_->children()) {
+      std::shared_ptr<Converter> child_converter;
+      RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
+      child_converters_.push_back(child_converter);
+      child_builders.push_back(child_converter->builder());
+    }
+    if (mode_ == UnionMode::DENSE) {
+      builder_ = std::make_shared<DenseUnionBuilder>(default_memory_pool(),
+                                                     std::move(child_builders), type_);
+    } else {
+      builder_ = std::make_shared<SparseUnionBuilder>(default_memory_pool(),
+                                                      std::move(child_builders), type_);
+    }
+    return Status::OK();
+  }
+
+  Status AppendNull() override {
+    for (auto& converter : child_converters_) {
+      RETURN_NOT_OK(converter->AppendNull());
+    }
+    return builder_->AppendNull();
+  }
+
+  // Append a JSON value that is either an array of N elements in order
+  // or an object mapping struct names to values (omitted struct members
+  // are mapped to null).
+  Status AppendValue(const rj::Value& json_obj) override {
+    if (json_obj.IsNull()) {
+      return AppendNull();
+    }
+    if (!json_obj.IsArray()) {
+      return JSONTypeError("array", json_obj.GetType());
+    }
+    if (json_obj.Size() != 2) {
+      return Status::Invalid("Expected [type_id, value] pair, got array of size ",
+                             json_obj.Size());
+    }
+    const auto& id_obj = json_obj[0];
+    if (!id_obj.IsInt()) {
+      return JSONTypeError("int", id_obj.GetType());
+    }
+
+    auto id = static_cast<int8_t>(id_obj.GetInt());
+    auto child_num = type_id_to_child_num_[id];
+    if (child_num == -1) {
+      return Status::Invalid("type_id ", id, " not found in ", *type_);
+    }
+
+    auto child_converter = child_converters_[child_num];
+    if (mode_ == UnionMode::DENSE) {
+      RETURN_NOT_OK(checked_cast<DenseUnionBuilder&>(*builder_).Append(id));
+    } else {
+      RETURN_NOT_OK(checked_cast<SparseUnionBuilder&>(*builder_).Append(id));
+      for (auto&& other_converter : child_converters_) {
+        if (other_converter != child_converter) {
+          RETURN_NOT_OK(other_converter->AppendNull());
+        }
+      }
+    }
+    return child_converter->AppendValue(json_obj[1]);
+  }
+
+  std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+  UnionMode::type mode_;
+  std::shared_ptr<ArrayBuilder> builder_;
+  std::vector<std::shared_ptr<Converter>> child_converters_;
+  std::vector<int8_t> type_id_to_child_num_;
+};
+
 // ------------------------------------------------------------------------
 // General conversion functions
 
@@ -644,10 +736,13 @@ Status GetConverter(const std::shared_ptr<DataType>& type,
     SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter)
     SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter)
     SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
-    SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter)
-    SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter)
+    SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter<StringType>)
+    SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
+    SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
+    SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
     SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter)
     SIMPLE_CONVERTER_CASE(Type::DECIMAL, DecimalConverter)
+    SIMPLE_CONVERTER_CASE(Type::UNION, UnionConverter)
     default: {
       return Status::NotImplemented("JSON conversion to ", type->ToString(),
                                     " not implemented");
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 4e1a1576ddb..93f859a0a07 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -232,6 +232,9 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
     case flatbuf::Type_Binary:
       *out = binary();
       return Status::OK();
+    case flatbuf::Type_LargeBinary:
+      *out = large_binary();
+      return Status::OK();
     case flatbuf::Type_FixedSizeBinary: {
       auto fw_binary = static_cast<const flatbuf::FixedSizeBinary*>(type_data);
       *out = fixed_size_binary(fw_binary->byteWidth());
@@ -240,6 +243,9 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
     case flatbuf::Type_Utf8:
       *out = utf8();
       return Status::OK();
+    case flatbuf::Type_LargeUtf8:
+      *out = large_utf8();
+      return Status::OK();
     case flatbuf::Type_Bool:
       *out = boolean();
       return Status::OK();
@@ -357,8 +363,12 @@ static Status TypeFromFlatbuffer(const flatbuf::Field* field,
                                  const std::vector<std::shared_ptr<Field>>& children,
                                  const KeyValueMetadata* field_metadata,
                                  std::shared_ptr<DataType>* out) {
-  RETURN_NOT_OK(
-      ConcreteTypeFromFlatbuffer(field->type_type(), field->type(), children, out));
+  auto type_data = field->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+        "Type-pointer in custom metadata of flatbuffer-encoded Field is null.");
+  }
+  RETURN_NOT_OK(ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, out));
 
   // Look for extension metadata in custom_metadata field
   // TODO(wesm): Should this be part of the Field Flatbuffers table?
@@ -537,12 +547,24 @@ class FieldToFlatbufferVisitor {
     return Status::OK();
   }
 
+  Status Visit(const LargeBinaryType& type) {
+    fb_type_ = flatbuf::Type_LargeBinary;
+    type_offset_ = flatbuf::CreateLargeBinary(fbb_).Union();
+    return Status::OK();
+  }
+
   Status Visit(const StringType& type) {
     fb_type_ = flatbuf::Type_Utf8;
     type_offset_ = flatbuf::CreateUtf8(fbb_).Union();
     return Status::OK();
   }
 
+  Status Visit(const LargeStringType& type) {
+    fb_type_ = flatbuf::Type_LargeUtf8;
+    type_offset_ = flatbuf::CreateLargeUtf8(fbb_).Union();
+    return Status::OK();
+  }
+
   Status Visit(const Date32Type& type) {
     fb_type_ = flatbuf::Type_Date;
     type_offset_ = flatbuf::CreateDate(fbb_, flatbuf::DateUnit_DAY).Union();
@@ -758,12 +780,22 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona
     // based on the DictionaryEncoding metadata and record in the
     // dictionary_memo
     std::shared_ptr<DataType> index_type;
-    RETURN_NOT_OK(IntFromFlatbuffer(encoding->indexType(), &index_type));
+    auto int_data = encoding->indexType();
+    if (int_data == nullptr) {
+      return Status::IOError(
+          "indexType-pointer in custom metadata of flatbuffer-encoded DictionaryEncoding "
+          "is null.");
+    }
+    RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type));
     type = ::arrow::dictionary(index_type, type, encoding->isOrdered());
     *out = ::arrow::field(field->name()->str(), type, field->nullable(), metadata);
     RETURN_NOT_OK(dictionary_memo->AddField(encoding->id(), *out));
   } else {
-    *out = ::arrow::field(field->name()->str(), type, field->nullable(), metadata);
+    auto name = field->name();
+    if (name == nullptr) {
+      return Status::IOError("Name-pointer of flatbuffer-encoded Field is null.");
+    }
+    *out = ::arrow::field(name->str(), type, field->nullable(), metadata);
   }
   return Status::OK();
 }
@@ -1137,7 +1169,12 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
     }
   }
 
-  return ConcreteTypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type);
+  auto type_data = tensor->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+        "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null.");
+  }
+  return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type);
 }
 
 Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
@@ -1181,8 +1218,12 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>
       return Status::Invalid("Unrecognized sparse index type");
   }
 
-  return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), sparse_tensor->type(), {},
-                                    type);
+  auto type_data = sparse_tensor->type();
+  if (type_data == nullptr) {
+    return Status::IOError(
+        "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is null.");
+  }
+  return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type);
 }
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 002379e3779..b9f29d747b4 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -98,7 +98,12 @@ class IpcComponentSource {
       : metadata_(metadata), file_(file) {}
 
   Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
-    const flatbuf::Buffer* buffer = metadata_->buffers()->Get(buffer_index);
+    auto buffers = metadata_->buffers();
+    if (buffers == nullptr) {
+      return Status::IOError(
+          "Buffers-pointer of flatbuffer-encoded RecordBatch is null.");
+    }
+    const flatbuf::Buffer* buffer = buffers->Get(buffer_index);
 
     if (buffer->length() == 0) {
       *out = nullptr;
@@ -115,6 +120,9 @@ class IpcComponentSource {
 
   Status GetFieldMetadata(int field_index, ArrayData* out) {
     auto nodes = metadata_->nodes();
+    if (nodes == nullptr) {
+      return Status::IOError("Nodes-pointer of flatbuffer-encoded Table is null.");
+    }
     // pop off a field
     if (field_index >= static_cast<int>(nodes->size())) {
       return Status::Invalid("Ran out of field metadata, likely malformed");
@@ -241,8 +249,10 @@ class ArrayLoader {
   }
 
   template <typename T>
-  typename std::enable_if<std::is_base_of<BinaryType, T>::value, Status>::type Visit(
-      const T& type) {
+  typename std::enable_if<std::is_base_of<BinaryType, T>::value ||
+                              std::is_base_of<LargeBinaryType, T>::value,
+                          Status>::type
+  Visit(const T& type) {
     return LoadBinary<T>();
   }
 
diff --git a/cpp/src/arrow/ipc/test-common.cc b/cpp/src/arrow/ipc/test-common.cc
index 47c307659f0..4cf13ecc059 100644
--- a/cpp/src/arrow/ipc/test-common.cc
+++ b/cpp/src/arrow/ipc/test-common.cc
@@ -34,6 +34,7 @@
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/bit-util.h"
 
 namespace arrow {
@@ -205,18 +206,16 @@ Status MakeRandomStringArray(int64_t length, bool include_nulls, MemoryPool* poo
   return builder.Finish(out);
 }
 
-template <class Builder, class RawType>
+template <class BuilderType>
 static Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls,
                                               MemoryPool* pool,
                                               std::shared_ptr<Array>* out) {
-  Builder builder(pool);
+  BuilderType builder(pool);
   for (int64_t i = 0; i < length; ++i) {
     if (include_nulls && (i % 7 == 0)) {
       RETURN_NOT_OK(builder.AppendNull());
     } else {
-      const std::string value = std::to_string(i);
-      RETURN_NOT_OK(builder.Append(reinterpret_cast<const RawType*>(value.data()),
-                                   static_cast<int32_t>(value.size())));
+      RETURN_NOT_OK(builder.Append(std::to_string(i)));
     }
   }
   return builder.Finish(out);
@@ -224,28 +223,37 @@ static Status MakeBinaryArrayWithUniqueValues(int64_t length, bool include_nulls
 
 Status MakeStringTypesRecordBatch(std::shared_ptr<RecordBatch>* out, bool with_nulls) {
   const int64_t length = 500;
-  auto string_type = utf8();
-  auto binary_type = binary();
-  auto f0 = field("f0", string_type);
-  auto f1 = field("f1", binary_type);
-  auto schema = ::arrow::schema({f0, f1});
+  auto f0 = field("strings", utf8());
+  auto f1 = field("binaries", binary());
+  auto f2 = field("large_strings", large_utf8());
+  auto f3 = field("large_binaries", large_binary());
+  auto schema = ::arrow::schema({f0, f1, f2, f3});
 
-  std::shared_ptr<Array> a0, a1;
+  std::shared_ptr<Array> a0, a1, a2, a3;
   MemoryPool* pool = default_memory_pool();
 
   // Quirk with RETURN_NOT_OK macro and templated functions
   {
-    auto s = MakeBinaryArrayWithUniqueValues<StringBuilder, char>(length, with_nulls,
-                                                                  pool, &a0);
+    auto s =
+        MakeBinaryArrayWithUniqueValues<StringBuilder>(length, with_nulls, pool, &a0);
     RETURN_NOT_OK(s);
   }
-
   {
-    auto s = MakeBinaryArrayWithUniqueValues<BinaryBuilder, uint8_t>(length, with_nulls,
-                                                                     pool, &a1);
+    auto s =
+        MakeBinaryArrayWithUniqueValues<BinaryBuilder>(length, with_nulls, pool, &a1);
     RETURN_NOT_OK(s);
   }
-  *out = RecordBatch::Make(schema, length, {a0, a1});
+  {
+    auto s = MakeBinaryArrayWithUniqueValues<LargeStringBuilder>(length, with_nulls, pool,
+                                                                 &a2);
+    RETURN_NOT_OK(s);
+  }
+  {
+    auto s = MakeBinaryArrayWithUniqueValues<LargeBinaryBuilder>(length, with_nulls, pool,
+                                                                 &a3);
+    RETURN_NOT_OK(s);
+  }
+  *out = RecordBatch::Make(schema, length, {a0, a1, a2, a3});
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index e1c2ecacba4..ec372074d8b 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -225,7 +225,8 @@ class RecordBatchSerializer : public ArrayVisitor {
   template <typename ArrayType>
   Status GetZeroBasedValueOffsets(const ArrayType& array,
                                   std::shared_ptr<Buffer>* value_offsets) {
-    // Share slicing logic between ListArray and BinaryArray
+    // Share slicing logic between ListArray, BinaryArray and LargeBinaryArray
+    using offset_type = typename ArrayType::offset_type;
 
     auto offsets = array.value_offsets();
 
@@ -235,11 +236,12 @@ class RecordBatchSerializer : public ArrayVisitor {
       // b) slice the values array accordingly
 
       std::shared_ptr<Buffer> shifted_offsets;
-      RETURN_NOT_OK(AllocateBuffer(pool_, sizeof(int32_t) * (array.length() + 1),
+      RETURN_NOT_OK(AllocateBuffer(pool_, sizeof(offset_type) * (array.length() + 1),
                                    &shifted_offsets));
 
-      int32_t* dest_offsets = reinterpret_cast<int32_t*>(shifted_offsets->mutable_data());
-      const int32_t start_offset = array.value_offset(0);
+      offset_type* dest_offsets =
+          reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
+      const offset_type start_offset = array.value_offset(0);
 
       for (int i = 0; i < array.length(); ++i) {
         dest_offsets[i] = array.value_offset(i) - start_offset;
@@ -253,9 +255,10 @@ class RecordBatchSerializer : public ArrayVisitor {
     return Status::OK();
   }
 
-  Status VisitBinary(const BinaryArray& array) {
+  template <typename ArrayType>
+  Status VisitBinary(const ArrayType& array) {
     std::shared_ptr<Buffer> value_offsets;
-    RETURN_NOT_OK(GetZeroBasedValueOffsets<BinaryArray>(array, &value_offsets));
+    RETURN_NOT_OK(GetZeroBasedValueOffsets<ArrayType>(array, &value_offsets));
     auto data = array.value_data();
 
     int64_t total_data_bytes = 0;
@@ -343,6 +346,10 @@ class RecordBatchSerializer : public ArrayVisitor {
 
   Status Visit(const BinaryArray& array) override { return VisitBinary(array); }
 
+  Status Visit(const LargeStringArray& array) override { return VisitBinary(array); }
+
+  Status Visit(const LargeBinaryArray& array) override { return VisitBinary(array); }
+
   Status Visit(const ListArray& array) override { return VisitList(array); }
 
   Status Visit(const MapArray& array) override { return VisitList(array); }
diff --git a/cpp/src/arrow/json/converter-test.cc b/cpp/src/arrow/json/converter-test.cc
index 86e8e8dc84a..cf09e617dec 100644
--- a/cpp/src/arrow/json/converter-test.cc
+++ b/cpp/src/arrow/json/converter-test.cc
@@ -85,6 +85,11 @@ TEST(ConverterTest, String) {
   AssertConvert(utf8(), src, src);
 }
 
+TEST(ConverterTest, LargeString) {
+  std::string src = R"(["a", "b c", null, "d e f", "g"])";
+  AssertConvert(large_utf8(), src, src);
+}
+
 TEST(ConverterTest, Timestamp) {
   std::string src = R"([null, "1970-01-01", "2018-11-13 17:11:10"])";
   AssertConvert(timestamp(TimeUnit::SECOND), src, src);
diff --git a/cpp/src/arrow/json/converter.cc b/cpp/src/arrow/json/converter.cc
index 078e3141869..6b7b730865b 100644
--- a/cpp/src/arrow/json/converter.cc
+++ b/cpp/src/arrow/json/converter.cc
@@ -264,6 +264,8 @@ Status MakeConverter(const std::shared_ptr<DataType>& out_type, MemoryPool* pool
     CONVERTER_CASE(Type::DATE64, DateTimeConverter<Date64Type>);
     CONVERTER_CASE(Type::BINARY, BinaryConverter<BinaryType>);
     CONVERTER_CASE(Type::STRING, BinaryConverter<StringType>);
+    CONVERTER_CASE(Type::LARGE_BINARY, BinaryConverter<LargeBinaryType>);
+    CONVERTER_CASE(Type::LARGE_STRING, BinaryConverter<LargeStringType>);
     default:
       return Status::NotImplemented("JSON conversion to ", *out_type,
                                     " is not supported");
diff --git a/cpp/src/arrow/json/reader-test.cc b/cpp/src/arrow/json/reader-test.cc
index f538ce743de..b6b21ce6868 100644
--- a/cpp/src/arrow/json/reader-test.cc
+++ b/cpp/src/arrow/json/reader-test.cc
@@ -62,18 +62,13 @@ class ReaderTest : public ::testing::TestWithParam<bool> {
     SetUpReader();
   }
 
-  std::shared_ptr<Column> ColumnFromJSON(const std::shared_ptr<Field>& field,
-                                         const std::string& data) {
-    return std::make_shared<Column>(field, ArrayFromJSON(field->type(), data));
-  }
-
-  std::shared_ptr<Column> ColumnFromJSON(const std::shared_ptr<Field>& field,
-                                         const std::vector<std::string>& data) {
+  std::shared_ptr<ChunkedArray> ChunkedFromJSON(const std::shared_ptr<Field>& field,
+                                                const std::vector<std::string>& data) {
     ArrayVector chunks(data.size());
     for (size_t i = 0; i < chunks.size(); ++i) {
       chunks[i] = ArrayFromJSON(field->type(), data[i]);
     }
-    return std::make_shared<Column>(field, std::move(chunks));
+    return std::make_shared<ChunkedArray>(std::move(chunks));
   }
 
   ParseOptions parse_options_ = ParseOptions::Defaults();
@@ -99,11 +94,16 @@ TEST_P(ReaderTest, Basics) {
   SetUpReader(src);
   ASSERT_OK(reader_->Read(&table_));
 
-  auto expected_table = Table::Make({
-      ColumnFromJSON(field("hello", float64()), "[3.5, 3.25, 3.125, 0.0]"),
-      ColumnFromJSON(field("world", boolean()), "[false, null, null, true]"),
-      ColumnFromJSON(field("yo", utf8()), "[\"thing\", null, \"\xe5\xbf\x8d\", null]"),
-  });
+  auto schema = ::arrow::schema(
+      {field("hello", float64()), field("world", boolean()), field("yo", utf8())});
+
+  auto expected_table = Table::Make(
+      schema, {
+                  ArrayFromJSON(schema->field(0)->type(), "[3.5, 3.25, 3.125, 0.0]"),
+                  ArrayFromJSON(schema->field(1)->type(), "[false, null, null, true]"),
+                  ArrayFromJSON(schema->field(2)->type(),
+                                "[\"thing\", null, \"\xe5\xbf\x8d\", null]"),
+              });
   AssertTablesEqual(*expected_table, *table_);
 }
 
@@ -113,14 +113,18 @@ TEST_P(ReaderTest, Nested) {
   SetUpReader(src);
   ASSERT_OK(reader_->Read(&table_));
 
-  auto expected_table = Table::Make({
-      ColumnFromJSON(field("hello", float64()), "[3.5, 3.25, 3.125, 0.0]"),
-      ColumnFromJSON(field("world", boolean()), "[false, null, null, true]"),
-      ColumnFromJSON(field("yo", utf8()), "[\"thing\", null, \"\xe5\xbf\x8d\", null]"),
-      ColumnFromJSON(field("arr", list(int64())), R"([[1, 2, 3], [2], [], null])"),
-      ColumnFromJSON(field("nuf", struct_({field("ps", int64())})),
-                     R"([{"ps":null}, null, {"ps":78}, {"ps":90}])"),
-  });
+  auto schema = ::arrow::schema({field("hello", float64()), field("world", boolean()),
+                                 field("yo", utf8()), field("arr", list(int64())),
+                                 field("nuf", struct_({field("ps", int64())}))});
+
+  auto a0 = ArrayFromJSON(schema->field(0)->type(), "[3.5, 3.25, 3.125, 0.0]");
+  auto a1 = ArrayFromJSON(schema->field(1)->type(), "[false, null, null, true]");
+  auto a2 = ArrayFromJSON(schema->field(2)->type(),
+                          "[\"thing\", null, \"\xe5\xbf\x8d\", null]");
+  auto a3 = ArrayFromJSON(schema->field(3)->type(), "[[1, 2, 3], [2], [], null]");
+  auto a4 = ArrayFromJSON(schema->field(4)->type(),
+                          R"([{"ps":null}, null, {"ps":78}, {"ps":90}])");
+  auto expected_table = Table::Make(schema, {a0, a1, a2, a3, a4});
   AssertTablesEqual(*expected_table, *table_);
 }
 
@@ -133,17 +137,25 @@ TEST_P(ReaderTest, PartialSchema) {
   SetUpReader(src);
   ASSERT_OK(reader_->Read(&table_));
 
-  auto expected_table = Table::Make({
-      // NB: explicitly declared fields will appear first
-      ColumnFromJSON(
-          field("nuf", struct_({field("absent", date32()), field("ps", int64())})),
-          R"([{"absent":null,"ps":null}, null, {"absent":null,"ps":78}, {"absent":null,"ps":90}])"),
-      ColumnFromJSON(field("arr", list(float32())), R"([[1, 2, 3], [2], [], null])"),
-      // ...followed by undeclared fields
-      ColumnFromJSON(field("hello", float64()), "[3.5, 3.25, 3.125, 0.0]"),
-      ColumnFromJSON(field("world", boolean()), "[false, null, null, true]"),
-      ColumnFromJSON(field("yo", utf8()), "[\"thing\", null, \"\xe5\xbf\x8d\", null]"),
-  });
+  auto schema = ::arrow::schema(
+      {field("nuf", struct_({field("absent", date32()), field("ps", int64())})),
+       field("arr", list(float32())), field("hello", float64()),
+       field("world", boolean()), field("yo", utf8())});
+
+  auto expected_table = Table::Make(
+      schema,
+      {
+          // NB: explicitly declared fields will appear first
+          ArrayFromJSON(
+              schema->field(0)->type(),
+              R"([{"absent":null,"ps":null}, null, {"absent":null,"ps":78}, {"absent":null,"ps":90}])"),
+          ArrayFromJSON(schema->field(1)->type(), R"([[1, 2, 3], [2], [], null])"),
+          // ...followed by undeclared fields
+          ArrayFromJSON(schema->field(2)->type(), "[3.5, 3.25, 3.125, 0.0]"),
+          ArrayFromJSON(schema->field(3)->type(), "[false, null, null, true]"),
+          ArrayFromJSON(schema->field(4)->type(),
+                        "[\"thing\", null, \"\xe5\xbf\x8d\", null]"),
+      });
   AssertTablesEqual(*expected_table, *table_);
 }
 
@@ -156,14 +168,16 @@ TEST_P(ReaderTest, TypeInference) {
     )");
   ASSERT_OK(reader_->Read(&table_));
 
-  auto expected_table =
-      Table::Make({ColumnFromJSON(field("ts", timestamp(TimeUnit::SECOND)),
-                                  R"([null, "1970-01-01", "2018-11-13 17:11:10"])"),
-                   ColumnFromJSON(field("f", float64()), R"([null, 3, 3.125])")});
+  auto schema =
+      ::arrow::schema({field("ts", timestamp(TimeUnit::SECOND)), field("f", float64())});
+  auto expected_table = Table::Make(
+      schema, {ArrayFromJSON(schema->field(0)->type(),
+                             R"([null, "1970-01-01", "2018-11-13 17:11:10"])"),
+               ArrayFromJSON(schema->field(1)->type(), R"([null, 3, 3.125])")});
   AssertTablesEqual(*expected_table, *table_);
 }
 
-TEST_P(ReaderTest, MutlipleChunks) {
+TEST_P(ReaderTest, MultipleChunks) {
   parse_options_.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
 
   auto src = scalars_only_src();
@@ -172,15 +186,18 @@ TEST_P(ReaderTest, MutlipleChunks) {
   SetUpReader(src);
   ASSERT_OK(reader_->Read(&table_));
 
+  auto schema = ::arrow::schema(
+      {field("hello", float64()), field("world", boolean()), field("yo", utf8())});
+
   // there is an empty chunk because the last block of the file is "  "
-  auto expected_table = Table::Make({
-      ColumnFromJSON(field("hello", float64()),
-                     {"[3.5]", "[3.25]", "[3.125, 0.0]", "[]"}),
-      ColumnFromJSON(field("world", boolean()),
-                     {"[false]", "[null]", "[null, true]", "[]"}),
-      ColumnFromJSON(field("yo", utf8()),
-                     {"[\"thing\"]", "[null]", "[\"\xe5\xbf\x8d\", null]", "[]"}),
-  });
+  auto expected_table = Table::Make(
+      schema,
+      {
+          ChunkedFromJSON(schema->field(0), {"[3.5]", "[3.25]", "[3.125, 0.0]", "[]"}),
+          ChunkedFromJSON(schema->field(1), {"[false]", "[null]", "[null, true]", "[]"}),
+          ChunkedFromJSON(schema->field(2),
+                          {"[\"thing\"]", "[null]", "[\"\xe5\xbf\x8d\", null]", "[]"}),
+      });
   AssertTablesEqual(*expected_table, *table_);
 }
 
@@ -225,7 +242,7 @@ TEST(ReaderTest, MultipleChunksParallel) {
 
   ASSERT_EQ(serial->column(0)->type()->id(), Type::INT64);
   int expected = 0;
-  for (auto chunk : serial->column(0)->data()->chunks()) {
+  for (auto chunk : serial->column(0)->chunks()) {
     for (int64_t i = 0; i < chunk->length(); ++i) {
       ASSERT_EQ(checked_cast<const Int64Array*>(chunk.get())->GetView(i), expected)
           << " at index " << i;
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index 7600ab41f54..cdb230c6c3e 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -155,6 +155,7 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
   null
 ])expected";
   CheckPrimitive<StringType, std::string>({0, 10}, is_valid, values3, ex3);
+  CheckPrimitive<LargeStringType, std::string>({0, 10}, is_valid, values3, ex3);
   static const char* ex3_in2 = R"expected(  [
     "foo",
     "bar",
@@ -163,6 +164,7 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
     null
   ])expected";
   CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
+  CheckPrimitive<LargeStringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
 }
 
 TEST_F(TestPrettyPrint, Int8) {
@@ -338,9 +340,11 @@ TEST_F(TestPrettyPrint, BinaryType) {
   std::vector<std::string> values = {"foo", "bar", "", "baz", "", "\xff"};
   static const char* ex = "[\n  666F6F,\n  626172,\n  null,\n  62617A,\n  ,\n  FF\n]";
   CheckPrimitive<BinaryType, std::string>({0}, is_valid, values, ex);
+  CheckPrimitive<LargeBinaryType, std::string>({0}, is_valid, values, ex);
   static const char* ex_in2 =
       "  [\n    666F6F,\n    626172,\n    null,\n    62617A,\n    ,\n    FF\n  ]";
   CheckPrimitive<BinaryType, std::string>({2}, is_valid, values, ex_in2);
+  CheckPrimitive<LargeBinaryType, std::string>({2}, is_valid, values, ex_in2);
 }
 
 TEST_F(TestPrettyPrint, ListType) {
@@ -567,51 +571,10 @@ TEST_F(TestPrettyPrint, ChunkedArrayPrimitiveType) {
   CheckStream(chunked_array_2, {0}, expected_2);
 }
 
-TEST_F(TestPrettyPrint, ColumnPrimitiveType) {
-  std::shared_ptr<Field> int_field = field("column", int32());
-  auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
-  Column column(int_field, ArrayVector({array}));
-
-  static const char* expected = R"expected(column: int32
-[
-  [
-    0,
-    1,
-    null,
-    3,
-    null
-  ]
-])expected";
-  CheckStream(column, {0}, expected);
-
-  Column column_2(int_field, {array, array});
-
-  static const char* expected_2 = R"expected(column: int32
-[
-  [
-    0,
-    1,
-    null,
-    3,
-    null
-  ],
-  [
-    0,
-    1,
-    null,
-    3,
-    null
-  ]
-])expected";
-
-  CheckStream(column_2, {0}, expected_2);
-}
-
 TEST_F(TestPrettyPrint, TablePrimitive) {
   std::shared_ptr<Field> int_field = field("column", int32());
   auto array = ArrayFromJSON(int_field->type(), "[0, 1, null, 3, null]");
-  std::shared_ptr<Column> column =
-      std::make_shared<Column>(int_field, ArrayVector({array}));
+  auto column = std::make_shared<ChunkedArray>(ArrayVector({array}));
   std::shared_ptr<Schema> table_schema = schema({int_field});
   std::shared_ptr<Table> table = Table::Make(table_schema, {column});
 
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index cb67b0dcf95..5a54e13b889 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -205,7 +205,9 @@ class ArrayPrinter : public PrettyPrinter {
 
   // String (Utf8)
   template <typename T>
-  inline typename std::enable_if<std::is_same<StringArray, T>::value, Status>::type
+  inline typename std::enable_if<std::is_same<StringArray, T>::value ||
+                                     std::is_same<LargeStringArray, T>::value,
+                                 Status>::type
   WriteDataValues(const T& array) {
     WriteValues(array, [&](int64_t i) { (*sink_) << "\"" << array.GetView(i) << "\""; });
     return Status::OK();
@@ -213,7 +215,9 @@ class ArrayPrinter : public PrettyPrinter {
 
   // Binary
   template <typename T>
-  inline typename std::enable_if<std::is_same<BinaryArray, T>::value, Status>::type
+  inline typename std::enable_if<std::is_same<BinaryArray, T>::value ||
+                                     std::is_same<LargeBinaryArray, T>::value,
+                                 Status>::type
   WriteDataValues(const T& array) {
     WriteValues(array, [&](int64_t i) { (*sink_) << HexEncode(array.GetView(i)); });
     return Status::OK();
@@ -314,6 +318,7 @@ class ArrayPrinter : public PrettyPrinter {
   typename std::enable_if<std::is_base_of<PrimitiveArray, T>::value ||
                               std::is_base_of<FixedSizeBinaryArray, T>::value ||
                               std::is_base_of<BinaryArray, T>::value ||
+                              std::is_base_of<LargeBinaryArray, T>::value ||
                               std::is_base_of<ListArray, T>::value ||
                               std::is_base_of<MapArray, T>::value ||
                               std::is_base_of<FixedSizeListArray, T>::value,
@@ -510,16 +515,6 @@ Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& op
   return Status::OK();
 }
 
-Status PrettyPrint(const Column& column, const PrettyPrintOptions& options,
-                   std::ostream* sink) {
-  for (int i = 0; i < options.indent; ++i) {
-    (*sink) << " ";
-  }
-  (*sink) << column.field()->ToString() << "\n";
-
-  return PrettyPrint(*column.data(), options, sink);
-}
-
 Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
                    std::string* result) {
   std::ostringstream sink;
@@ -552,7 +547,7 @@ Status PrettyPrint(const Table& table, const PrettyPrintOptions& options,
       (*sink) << " ";
     }
     (*sink) << table.schema()->field(i)->name() << ":\n";
-    RETURN_NOT_OK(PrettyPrint(*table.column(i)->data(), column_options, sink));
+    RETURN_NOT_OK(PrettyPrint(*table.column(i), column_options, sink));
     (*sink) << "\n";
   }
   (*sink) << std::flush;
diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h
index 9c2708f16ee..5740341a67d 100644
--- a/cpp/src/arrow/pretty_print.h
+++ b/cpp/src/arrow/pretty_print.h
@@ -26,7 +26,6 @@
 namespace arrow {
 
 class Array;
-class Column;
 class ChunkedArray;
 class RecordBatch;
 class Schema;
@@ -91,11 +90,6 @@ ARROW_EXPORT
 Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options,
                    std::string* result);
 
-/// \brief Print human-readable representation of Column
-ARROW_EXPORT
-Status PrettyPrint(const Column& column, const PrettyPrintOptions& options,
-                   std::ostream* sink);
-
 ARROW_EXPORT
 Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options,
                    std::ostream* sink);
diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 59bdb17c896..f4f35acba93 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -216,7 +216,7 @@ class PandasBlock {
   virtual ~PandasBlock() {}
 
   virtual Status Allocate() = 0;
-  virtual Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  virtual Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                        int64_t rel_placement) = 0;
 
   PyObject* block_arr() const { return block_arr_.obj(); }
@@ -547,25 +547,21 @@ inline Status ConvertStruct(const PandasOptions& options, const ChunkedArray& da
 }
 
 template <typename ArrowType>
-inline Status ConvertListsLike(const PandasOptions& options,
-                               const std::shared_ptr<Column>& col,
+inline Status ConvertListsLike(const PandasOptions& options, const ChunkedArray& data,
                                PyObject** out_values) {
-  const ChunkedArray& data = *col->data().get();
-  const auto& list_type = checked_cast<const ListType&>(*col->type());
-
   // Get column of underlying value arrays
   std::vector<std::shared_ptr<Array>> value_arrays;
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = checked_cast<const ListArray&>(*data.chunk(c));
     value_arrays.emplace_back(arr.values());
   }
-  auto flat_column = std::make_shared<Column>(list_type.value_field(), value_arrays);
+  auto flat_column = std::make_shared<ChunkedArray>(value_arrays);
   // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
   //    Storing a reference to the whole Array would be to expensive.
 
   OwnedRefNoGIL owned_numpy_array;
-  RETURN_NOT_OK(
-      ConvertColumnToPandas(options, flat_column, nullptr, owned_numpy_array.ref()));
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_column, nullptr,
+                                            owned_numpy_array.ref()));
 
   PyObject* numpy_array = owned_numpy_array.obj();
 
@@ -709,9 +705,9 @@ static Status ConvertDecimals(const PandasOptions& options, const ChunkedArray&
   return Status::OK();
 }
 
-#define CONVERTLISTSLIKE_CASE(ArrowType, ArrowEnum)                          \
-  case Type::ArrowEnum:                                                      \
-    RETURN_NOT_OK((ConvertListsLike<ArrowType>(options_, col, out_buffer))); \
+#define CONVERTLISTSLIKE_CASE(ArrowType, ArrowEnum)                            \
+  case Type::ArrowEnum:                                                        \
+    RETURN_NOT_OK((ConvertListsLike<ArrowType>(options_, *data, out_buffer))); \
     break;
 
 class ObjectBlock : public PandasBlock {
@@ -719,53 +715,51 @@ class ObjectBlock : public PandasBlock {
   using PandasBlock::PandasBlock;
   Status Allocate() override { return AllocateNDArray(NPY_OBJECT); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     PyObject** out_buffer =
         reinterpret_cast<PyObject**>(block_data_) + rel_placement * num_rows_;
 
-    const ChunkedArray& data = *col->data().get();
-
     if (type == Type::BOOL) {
-      RETURN_NOT_OK(ConvertBooleanWithNulls(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertBooleanWithNulls(options_, *data, out_buffer));
     } else if (type == Type::UINT8) {
-      RETURN_NOT_OK(ConvertIntegerObjects<UInt8Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt8Type>(options_, *data, out_buffer));
     } else if (type == Type::INT8) {
-      RETURN_NOT_OK(ConvertIntegerObjects<Int8Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int8Type>(options_, *data, out_buffer));
     } else if (type == Type::UINT16) {
-      RETURN_NOT_OK(ConvertIntegerObjects<UInt16Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt16Type>(options_, *data, out_buffer));
     } else if (type == Type::INT16) {
-      RETURN_NOT_OK(ConvertIntegerObjects<Int16Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int16Type>(options_, *data, out_buffer));
     } else if (type == Type::UINT32) {
-      RETURN_NOT_OK(ConvertIntegerObjects<UInt32Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt32Type>(options_, *data, out_buffer));
     } else if (type == Type::INT32) {
-      RETURN_NOT_OK(ConvertIntegerObjects<Int32Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int32Type>(options_, *data, out_buffer));
     } else if (type == Type::UINT64) {
-      RETURN_NOT_OK(ConvertIntegerObjects<UInt64Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<UInt64Type>(options_, *data, out_buffer));
     } else if (type == Type::INT64) {
-      RETURN_NOT_OK(ConvertIntegerObjects<Int64Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertIntegerObjects<Int64Type>(options_, *data, out_buffer));
     } else if (type == Type::BINARY) {
-      RETURN_NOT_OK(ConvertBinaryLike<BinaryType>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertBinaryLike<BinaryType>(options_, *data, out_buffer));
     } else if (type == Type::STRING) {
-      RETURN_NOT_OK(ConvertBinaryLike<StringType>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertBinaryLike<StringType>(options_, *data, out_buffer));
     } else if (type == Type::FIXED_SIZE_BINARY) {
-      RETURN_NOT_OK(ConvertBinaryLike<FixedSizeBinaryType>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertBinaryLike<FixedSizeBinaryType>(options_, *data, out_buffer));
     } else if (type == Type::DATE32) {
-      RETURN_NOT_OK(ConvertDates<Date32Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertDates<Date32Type>(options_, *data, out_buffer));
     } else if (type == Type::DATE64) {
-      RETURN_NOT_OK(ConvertDates<Date64Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertDates<Date64Type>(options_, *data, out_buffer));
     } else if (type == Type::TIME32) {
-      RETURN_NOT_OK(ConvertTimes<Time32Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertTimes<Time32Type>(options_, *data, out_buffer));
     } else if (type == Type::TIME64) {
-      RETURN_NOT_OK(ConvertTimes<Time64Type>(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertTimes<Time64Type>(options_, *data, out_buffer));
     } else if (type == Type::DECIMAL) {
-      RETURN_NOT_OK(ConvertDecimals(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertDecimals(options_, *data, out_buffer));
     } else if (type == Type::NA) {
-      RETURN_NOT_OK(ConvertNulls(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertNulls(options_, *data, out_buffer));
     } else if (type == Type::LIST) {
-      auto list_type = std::static_pointer_cast<ListType>(col->type());
+      auto list_type = std::static_pointer_cast<ListType>(data->type());
       switch (list_type->value_type()->id()) {
         CONVERTLISTSLIKE_CASE(BooleanType, BOOL)
         CONVERTLISTSLIKE_CASE(UInt8Type, UINT8)
@@ -795,10 +789,10 @@ class ObjectBlock : public PandasBlock {
         }
       }
     } else if (type == Type::STRUCT) {
-      RETURN_NOT_OK(ConvertStruct(options_, data, out_buffer));
+      RETURN_NOT_OK(ConvertStruct(options_, *data, out_buffer));
     } else {
       return Status::NotImplemented("Unsupported type for object array output: ",
-                                    col->type()->ToString());
+                                    data->type()->ToString());
     }
 
     placement_data_[rel_placement] = abs_placement;
@@ -814,22 +808,20 @@ class IntBlock : public PandasBlock {
     return AllocateNDArray(internal::arrow_traits<ARROW_TYPE>::npy_type);
   }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     C_TYPE* out_buffer =
         reinterpret_cast<C_TYPE*>(block_data_) + rel_placement * num_rows_;
 
-    const ChunkedArray& data = *col->data().get();
-
     if (type != ARROW_TYPE) {
       return Status::NotImplemented("Cannot write Arrow data of type ",
-                                    col->type()->ToString(), " to a Pandas int",
+                                    data->type()->ToString(), " to a Pandas int",
                                     sizeof(C_TYPE), " block");
     }
 
-    ConvertIntegerNoNullsSameType<C_TYPE>(options_, data, out_buffer);
+    ConvertIntegerNoNullsSameType<C_TYPE>(options_, *data, out_buffer);
     placement_data_[rel_placement] = abs_placement;
     return Status::OK();
   }
@@ -849,20 +841,20 @@ class Float16Block : public PandasBlock {
   using PandasBlock::PandasBlock;
   Status Allocate() override { return AllocateNDArray(NPY_FLOAT16); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     if (type != Type::HALF_FLOAT) {
       return Status::NotImplemented("Cannot write Arrow data of type ",
-                                    col->type()->ToString(),
+                                    data->type()->ToString(),
                                     " to a Pandas float16 block");
     }
 
     npy_half* out_buffer =
         reinterpret_cast<npy_half*>(block_data_) + rel_placement * num_rows_;
 
-    ConvertNumericNullable<npy_half>(*col->data().get(), NPY_HALF_NAN, out_buffer);
+    ConvertNumericNullable<npy_half>(*data, NPY_HALF_NAN, out_buffer);
     placement_data_[rel_placement] = abs_placement;
     return Status::OK();
   }
@@ -873,19 +865,19 @@ class Float32Block : public PandasBlock {
   using PandasBlock::PandasBlock;
   Status Allocate() override { return AllocateNDArray(NPY_FLOAT32); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     if (type != Type::FLOAT) {
       return Status::NotImplemented("Cannot write Arrow data of type ",
-                                    col->type()->ToString(),
+                                    data->type()->ToString(),
                                     " to a Pandas float32 block");
     }
 
     float* out_buffer = reinterpret_cast<float*>(block_data_) + rel_placement * num_rows_;
 
-    ConvertNumericNullable<float>(*col->data().get(), NAN, out_buffer);
+    ConvertNumericNullable<float>(*data, NAN, out_buffer);
     placement_data_[rel_placement] = abs_placement;
     return Status::OK();
   }
@@ -896,17 +888,15 @@ class Float64Block : public PandasBlock {
   using PandasBlock::PandasBlock;
   Status Allocate() override { return AllocateNDArray(NPY_FLOAT64); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     double* out_buffer =
         reinterpret_cast<double*>(block_data_) + rel_placement * num_rows_;
 
-    const ChunkedArray& data = *col->data().get();
-
-#define INTEGER_CASE(IN_TYPE)                                   \
-  ConvertIntegerWithNulls<IN_TYPE>(options_, data, out_buffer); \
+#define INTEGER_CASE(IN_TYPE)                                    \
+  ConvertIntegerWithNulls<IN_TYPE>(options_, *data, out_buffer); \
   break;
 
     switch (type) {
@@ -927,14 +917,14 @@ class Float64Block : public PandasBlock {
       case Type::INT64:
         INTEGER_CASE(int64_t);
       case Type::FLOAT:
-        ConvertNumericNullableCast<float, double>(data, NAN, out_buffer);
+        ConvertNumericNullableCast<float, double>(*data, NAN, out_buffer);
         break;
       case Type::DOUBLE:
-        ConvertNumericNullable<double>(data, NAN, out_buffer);
+        ConvertNumericNullable<double>(*data, NAN, out_buffer);
         break;
       default:
         return Status::NotImplemented("Cannot write Arrow data of type ",
-                                      col->type()->ToString(),
+                                      data->type()->ToString(),
                                       " to a Pandas float64 block");
     }
 
@@ -950,20 +940,18 @@ class BoolBlock : public PandasBlock {
   using PandasBlock::PandasBlock;
   Status Allocate() override { return AllocateNDArray(NPY_BOOL); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
-
-    if (type != Type::BOOL) {
+    if (data->type()->id() != Type::BOOL) {
       return Status::NotImplemented("Cannot write Arrow data of type ",
-                                    col->type()->ToString(),
+                                    data->type()->ToString(),
                                     " to a Pandas boolean block");
     }
 
     uint8_t* out_buffer =
         reinterpret_cast<uint8_t*>(block_data_) + rel_placement * num_rows_;
 
-    ConvertBooleanNoNulls(options_, *col->data(), out_buffer);
+    ConvertBooleanNoNulls(options_, *data, out_buffer);
     placement_data_[rel_placement] = abs_placement;
     return Status::OK();
   }
@@ -984,39 +972,37 @@ class DatetimeBlock : public PandasBlock {
 
   Status Allocate() override { return AllocateDatetime(2); }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    Type::type type = col->type()->id();
+    Type::type type = data->type()->id();
 
     int64_t* out_buffer =
         reinterpret_cast<int64_t*>(block_data_) + rel_placement * num_rows_;
 
-    const ChunkedArray& data = *col->data();
-
     if (type == Type::DATE32) {
       // Convert from days since epoch to datetime64[ns]
-      ConvertDatetimeNanos<int32_t, kNanosecondsInDay>(data, out_buffer);
+      ConvertDatetimeNanos<int32_t, kNanosecondsInDay>(*data, out_buffer);
     } else if (type == Type::DATE64) {
       // Date64Type is millisecond timestamp stored as int64_t
       // TODO(wesm): Do we want to make sure to zero out the milliseconds?
-      ConvertDatetimeNanos<int64_t, 1000000L>(data, out_buffer);
+      ConvertDatetimeNanos<int64_t, 1000000L>(*data, out_buffer);
     } else if (type == Type::TIMESTAMP) {
-      const auto& ts_type = checked_cast<const TimestampType&>(*col->type());
+      const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
 
       if (ts_type.unit() == TimeUnit::NANO) {
-        ConvertNumericNullable<int64_t>(data, kPandasTimestampNull, out_buffer);
+        ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_buffer);
       } else if (ts_type.unit() == TimeUnit::MICRO) {
-        ConvertDatetimeNanos<int64_t, 1000L>(data, out_buffer);
+        ConvertDatetimeNanos<int64_t, 1000L>(*data, out_buffer);
       } else if (ts_type.unit() == TimeUnit::MILLI) {
-        ConvertDatetimeNanos<int64_t, 1000000L>(data, out_buffer);
+        ConvertDatetimeNanos<int64_t, 1000000L>(*data, out_buffer);
       } else if (ts_type.unit() == TimeUnit::SECOND) {
-        ConvertDatetimeNanos<int64_t, 1000000000L>(data, out_buffer);
+        ConvertDatetimeNanos<int64_t, 1000000000L>(*data, out_buffer);
       } else {
         return Status::NotImplemented("Unsupported time unit");
       }
     } else {
       return Status::NotImplemented("Cannot write Arrow data of type ",
-                                    col->type()->ToString(),
+                                    data->type()->ToString(),
                                     " to a Pandas datetime block.");
     }
 
@@ -1070,16 +1056,14 @@ class CategoricalBlock : public PandasBlock {
   }
 
   template <typename ArrowType>
-  Status WriteIndices(const std::shared_ptr<Column>& col) {
+  Status WriteIndices(const std::shared_ptr<ChunkedArray>& data) {
     using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
     using TRAITS = internal::arrow_traits<ArrowType::type_id>;
     using T = typename TRAITS::T;
     constexpr int npy_type = TRAITS::npy_type;
 
-    const ChunkedArray& data = *col->data().get();
-
     // Sniff the first chunk
-    const std::shared_ptr<Array> arr_first = data.chunk(0);
+    const std::shared_ptr<Array> arr_first = data->chunk(0);
     const auto& dict_arr_first = checked_cast<const DictionaryArray&>(*arr_first);
     const auto indices_first =
         std::static_pointer_cast<ArrayType>(dict_arr_first.indices());
@@ -1095,7 +1079,7 @@ class CategoricalBlock : public PandasBlock {
       return Status::OK();
     };
 
-    if (!needs_copy_ && data.num_chunks() == 1 && indices_first->null_count() == 0) {
+    if (!needs_copy_ && data->num_chunks() == 1 && indices_first->null_count() == 0) {
       RETURN_NOT_OK(CheckIndices(*indices_first, dict_arr_first.dictionary()->length()));
       RETURN_NOT_OK(AllocateNDArrayFromIndices<T>(npy_type, indices_first));
     } else {
@@ -1106,7 +1090,7 @@ class CategoricalBlock : public PandasBlock {
                                  "allowed");
         }
 
-        return Status::Invalid("Needed to copy ", data.num_chunks(), " chunks with ",
+        return Status::Invalid("Needed to copy ", data->num_chunks(), " chunks with ",
                                indices_first->null_count(),
                                " indices nulls, but zero_copy_only was True");
       }
@@ -1115,8 +1099,8 @@ class CategoricalBlock : public PandasBlock {
       // No relative placement offset because a single column
       T* out_values = reinterpret_cast<T*>(block_data_);
 
-      for (int c = 0; c < data.num_chunks(); c++) {
-        const std::shared_ptr<Array> arr = data.chunk(c);
+      for (int c = 0; c < data->num_chunks(); c++) {
+        const std::shared_ptr<Array> arr = data->chunk(c);
         const auto& dict_arr = checked_cast<const DictionaryArray&>(*arr);
 
         const auto& indices = checked_cast<const ArrayType&>(*dict_arr.indices());
@@ -1133,50 +1117,48 @@ class CategoricalBlock : public PandasBlock {
     return Status::OK();
   }
 
-  Status Write(const std::shared_ptr<Column>& col, int64_t abs_placement,
+  Status Write(const std::shared_ptr<ChunkedArray>& data, int64_t abs_placement,
                int64_t rel_placement) override {
-    std::shared_ptr<Column> converted_col;
+    std::shared_ptr<ChunkedArray> converted_data;
     if (options_.strings_to_categorical &&
-        (col->type()->id() == Type::STRING || col->type()->id() == Type::BINARY)) {
+        (data->type()->id() == Type::STRING || data->type()->id() == Type::BINARY)) {
       needs_copy_ = true;
       compute::FunctionContext ctx(pool_);
 
       Datum out;
-      RETURN_NOT_OK(compute::DictionaryEncode(&ctx, Datum(col->data()), &out));
+      RETURN_NOT_OK(compute::DictionaryEncode(&ctx, data, &out));
       DCHECK_EQ(out.kind(), Datum::CHUNKED_ARRAY);
-      converted_col =
-          std::make_shared<Column>(field(col->name(), out.type()), out.chunked_array());
+      converted_data = out.chunked_array();
     } else {
       // check if all dictionaries are equal
-      const ChunkedArray& data = *col->data().get();
-      const std::shared_ptr<Array> arr_first = data.chunk(0);
+      const std::shared_ptr<Array> arr_first = data->chunk(0);
       const auto& dict_arr_first = checked_cast<const DictionaryArray&>(*arr_first);
 
-      for (int c = 1; c < data.num_chunks(); c++) {
-        const std::shared_ptr<Array> arr = data.chunk(c);
+      for (int c = 1; c < data->num_chunks(); c++) {
+        const std::shared_ptr<Array> arr = data->chunk(c);
         const auto& dict_arr = checked_cast<const DictionaryArray&>(*arr);
 
         if (!(dict_arr_first.dictionary()->Equals(dict_arr.dictionary()))) {
           return Status::NotImplemented("Variable dictionary type not supported");
         }
       }
-      converted_col = col;
+      converted_data = data;
     }
 
-    const auto& dict_type = checked_cast<const DictionaryType&>(*converted_col->type());
+    const auto& dict_type = checked_cast<const DictionaryType&>(*converted_data->type());
 
     switch (dict_type.index_type()->id()) {
       case Type::INT8:
-        RETURN_NOT_OK(WriteIndices<Int8Type>(converted_col));
+        RETURN_NOT_OK(WriteIndices<Int8Type>(converted_data));
         break;
       case Type::INT16:
-        RETURN_NOT_OK(WriteIndices<Int16Type>(converted_col));
+        RETURN_NOT_OK(WriteIndices<Int16Type>(converted_data));
         break;
       case Type::INT32:
-        RETURN_NOT_OK(WriteIndices<Int32Type>(converted_col));
+        RETURN_NOT_OK(WriteIndices<Int32Type>(converted_data));
         break;
       case Type::INT64:
-        RETURN_NOT_OK(WriteIndices<Int64Type>(converted_col));
+        RETURN_NOT_OK(WriteIndices<Int64Type>(converted_data));
         break;
       default: {
         return Status::NotImplemented("Categorical index type not supported: ",
@@ -1185,7 +1167,7 @@ class CategoricalBlock : public PandasBlock {
     }
 
     // TODO(wesm): variable dictionaries
-    auto arr = converted_col->data()->chunk(0);
+    auto arr = converted_data->chunk(0);
     const auto& dict_arr = checked_cast<const DictionaryArray&>(*arr);
 
     placement_data_[rel_placement] = abs_placement;
@@ -1308,18 +1290,18 @@ Status MakeBlock(const PandasOptions& options, PandasBlock::type type, int64_t n
 
 using BlockMap = std::unordered_map<int, std::shared_ptr<PandasBlock>>;
 
-static Status GetPandasBlockType(const Column& col, const PandasOptions& options,
+static Status GetPandasBlockType(const ChunkedArray& data, const PandasOptions& options,
                                  PandasBlock::type* output_type) {
 #define INTEGER_CASE(NAME)                                                           \
   *output_type =                                                                     \
-      col.null_count() > 0                                                           \
+      data.null_count() > 0                                                          \
           ? options.integer_object_nulls ? PandasBlock::OBJECT : PandasBlock::DOUBLE \
           : PandasBlock::NAME;                                                       \
   break;
 
-  switch (col.type()->id()) {
+  switch (data.type()->id()) {
     case Type::BOOL:
-      *output_type = col.null_count() > 0 ? PandasBlock::OBJECT : PandasBlock::BOOL;
+      *output_type = data.null_count() > 0 ? PandasBlock::OBJECT : PandasBlock::BOOL;
       break;
     case Type::UINT8:
       INTEGER_CASE(UINT8);
@@ -1365,7 +1347,7 @@ static Status GetPandasBlockType(const Column& col, const PandasOptions& options
       *output_type = options.date_as_object ? PandasBlock::OBJECT : PandasBlock::DATETIME;
       break;
     case Type::TIMESTAMP: {
-      const auto& ts_type = checked_cast<const TimestampType&>(*col.type());
+      const auto& ts_type = checked_cast<const TimestampType&>(*data.type());
       if (ts_type.timezone() != "") {
         *output_type = PandasBlock::DATETIME_WITH_TZ;
       } else {
@@ -1373,7 +1355,7 @@ static Status GetPandasBlockType(const Column& col, const PandasOptions& options
       }
     } break;
     case Type::LIST: {
-      auto list_type = std::static_pointer_cast<ListType>(col.type());
+      auto list_type = std::static_pointer_cast<ListType>(data.type());
       if (!ListTypeSupported(*list_type->value_type())) {
         return Status::NotImplemented("Not implemented type for list in DataFrameBlock: ",
                                       list_type->value_type()->ToString());
@@ -1386,7 +1368,7 @@ static Status GetPandasBlockType(const Column& col, const PandasOptions& options
     default:
       return Status::NotImplemented(
           "No known equivalent Pandas block for Arrow data of type ",
-          col.type()->ToString(), " is known.");
+          data.type()->ToString(), " is known.");
   }
   return Status::OK();
 }
@@ -1418,7 +1400,7 @@ class DataFrameBlockCreator {
 
   Status CreateBlocks() {
     for (int i = 0; i < table_->num_columns(); ++i) {
-      std::shared_ptr<Column> col = table_->column(i);
+      std::shared_ptr<ChunkedArray> col = table_->column(i);
       PandasBlock::type output_type = PandasBlock::OBJECT;
       RETURN_NOT_OK(GetPandasBlockType(*col, options_, &output_type));
 
@@ -1558,14 +1540,14 @@ class DataFrameBlockCreator {
 
 class ArrowDeserializer {
  public:
-  ArrowDeserializer(const PandasOptions& options, const std::shared_ptr<Column>& col,
-                    PyObject* py_ref)
-      : col_(col), data_(*col->data().get()), options_(options), py_ref_(py_ref) {}
+  ArrowDeserializer(const PandasOptions& options,
+                    const std::shared_ptr<ChunkedArray>& data, PyObject* py_ref)
+      : data_(data), options_(options), py_ref_(py_ref) {}
 
   Status AllocateOutput(int type) {
     PyAcquireGIL lock;
 
-    result_ = NewArray1DFromType(col_->type().get(), type, col_->length(), nullptr);
+    result_ = NewArray1DFromType(data_->type().get(), type, data_->length(), nullptr);
     RETURN_IF_PYERROR();
     arr_ = reinterpret_cast<PyArrayObject*>(result_);
     return Status::OK();
@@ -1584,7 +1566,7 @@ class ArrowDeserializer {
     PyAcquireGIL lock;
 
     // Zero-Copy. We can pass the data pointer directly to NumPy.
-    result_ = NewArray1DFromType(col_->type().get(), npy_type, col_->length(), data);
+    result_ = NewArray1DFromType(data_->type().get(), npy_type, data_->length(), data);
     arr_ = reinterpret_cast<PyArrayObject*>(result_);
 
     if (arr_ == nullptr) {
@@ -1677,16 +1659,16 @@ class ArrowDeserializer {
     typedef typename traits::T T;
     int npy_type = traits::npy_type;
 
-    if (data_.num_chunks() == 1 && data_.null_count() == 0) {
-      return ConvertValuesZeroCopy<TYPE>(options_, npy_type, data_.chunk(0));
+    if (data_->num_chunks() == 1 && data_->null_count() == 0) {
+      return ConvertValuesZeroCopy<TYPE>(options_, npy_type, data_->chunk(0));
     } else if (options_.zero_copy_only) {
-      return Status::Invalid("Needed to copy ", data_.num_chunks(), " chunks with ",
-                             data_.null_count(), " nulls, but zero_copy_only was True");
+      return Status::Invalid("Needed to copy ", data_->num_chunks(), " chunks with ",
+                             data_->null_count(), " nulls, but zero_copy_only was True");
     }
 
     RETURN_NOT_OK(AllocateOutput(npy_type));
     auto out_values = reinterpret_cast<T*>(PyArray_DATA(arr_));
-    ConvertNumericNullable<T>(data_, traits::na_value, out_values);
+    ConvertNumericNullable<T>(*data_, traits::na_value, out_values);
 
     return Status::OK();
   }
@@ -1710,8 +1692,8 @@ class ArrowDeserializer {
     constexpr T na_value = traits::na_value;
     constexpr int64_t kShift = traits::npy_shift;
 
-    for (int c = 0; c < data_.num_chunks(); c++) {
-      const auto& arr = *data_.chunk(c);
+    for (int c = 0; c < data_->num_chunks(); c++) {
+      const auto& arr = *data_->chunk(c);
       const c_type* in_values = GetPrimitiveValues<c_type>(arr);
 
       for (int64_t i = 0; i < arr.length(); ++i) {
@@ -1743,8 +1725,8 @@ class ArrowDeserializer {
     constexpr T na_value = traits::na_value;
     constexpr int64_t kShift = traits::npy_shift;
 
-    for (int c = 0; c < data_.num_chunks(); c++) {
-      const auto& arr = *data_.chunk(c);
+    for (int c = 0; c < data_->num_chunks(); c++) {
+      const auto& arr = *data_->chunk(c);
       const c_type* in_values = GetPrimitiveValues<c_type>(arr);
 
       for (int64_t i = 0; i < arr.length(); ++i) {
@@ -1769,25 +1751,25 @@ class ArrowDeserializer {
 
     typedef typename traits::T T;
 
-    if (data_.num_chunks() == 1 && data_.null_count() == 0) {
-      return ConvertValuesZeroCopy<TYPE>(options_, traits::npy_type, data_.chunk(0));
+    if (data_->num_chunks() == 1 && data_->null_count() == 0) {
+      return ConvertValuesZeroCopy<TYPE>(options_, traits::npy_type, data_->chunk(0));
     } else if (options_.zero_copy_only) {
-      return Status::Invalid("Needed to copy ", data_.num_chunks(), " chunks with ",
-                             data_.null_count(), " nulls, but zero_copy_only was True");
+      return Status::Invalid("Needed to copy ", data_->num_chunks(), " chunks with ",
+                             data_->null_count(), " nulls, but zero_copy_only was True");
     }
 
-    if (data_.null_count() > 0) {
+    if (data_->null_count() > 0) {
       if (options_.integer_object_nulls) {
         return VisitObjects(ConvertIntegerObjects<Type>);
       } else {
         RETURN_NOT_OK(AllocateOutput(NPY_FLOAT64));
         auto out_values = reinterpret_cast<double*>(PyArray_DATA(arr_));
-        ConvertIntegerWithNulls<T>(options_, data_, out_values);
+        ConvertIntegerWithNulls<T>(options_, *data_, out_values);
       }
     } else {
       RETURN_NOT_OK(AllocateOutput(traits::npy_type));
       auto out_values = reinterpret_cast<T*>(PyArray_DATA(arr_));
-      ConvertIntegerNoNullsSameType<T>(options_, data_, out_values);
+      ConvertIntegerNoNullsSameType<T>(options_, *data_, out_values);
     }
 
     return Status::OK();
@@ -1800,7 +1782,7 @@ class ArrowDeserializer {
     }
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    return func(options_, data_, out_values);
+    return func(options_, *data_, out_values);
   }
 
   // Strings and binary
@@ -1829,12 +1811,12 @@ class ArrowDeserializer {
   Status Visit(const BooleanType& type) {
     if (options_.zero_copy_only) {
       return Status::Invalid("BooleanType needs copies, but zero_copy_only was True");
-    } else if (data_.null_count() > 0) {
+    } else if (data_->null_count() > 0) {
       return VisitObjects(ConvertBooleanWithNulls);
     } else {
       RETURN_NOT_OK(AllocateOutput(internal::arrow_traits<Type::BOOL>::npy_type));
       auto out_values = reinterpret_cast<uint8_t*>(PyArray_DATA(arr_));
-      ConvertBooleanNoNulls(options_, data_, out_values);
+      ConvertBooleanNoNulls(options_, *data_, out_values);
     }
     return Status::OK();
   }
@@ -1845,11 +1827,11 @@ class ArrowDeserializer {
     }
 #define CONVERTVALUES_LISTSLIKE_CASE(ArrowType, ArrowEnum) \
   case Type::ArrowEnum:                                    \
-    return ConvertListsLike<ArrowType>(options_, col_, out_values);
+    return ConvertListsLike<ArrowType>(options_, *data_, out_values);
 
     RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
     auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
-    auto list_type = std::static_pointer_cast<ListType>(col_->type());
+    auto list_type = std::static_pointer_cast<ListType>(data_->type());
     switch (list_type->value_type()->id()) {
       CONVERTVALUES_LISTSLIKE_CASE(UInt8Type, UINT8)
       CONVERTVALUES_LISTSLIKE_CASE(Int8Type, INT8)
@@ -1879,8 +1861,8 @@ class ArrowDeserializer {
   }
 
   Status Visit(const DictionaryType& type) {
-    auto block = std::make_shared<CategoricalBlock>(options_, nullptr, col_->length());
-    RETURN_NOT_OK(block->Write(col_, 0, 0));
+    auto block = std::make_shared<CategoricalBlock>(options_, nullptr, data_->length());
+    RETURN_NOT_OK(block->Write(data_, 0, 0));
 
     PyAcquireGIL lock;
     result_ = PyDict_New();
@@ -1903,14 +1885,13 @@ class ArrowDeserializer {
   Status Visit(const DataType& type) { return Status::NotImplemented(type.name()); }
 
   Status Convert(PyObject** out) {
-    RETURN_NOT_OK(VisitTypeInline(*col_->type(), this));
+    RETURN_NOT_OK(VisitTypeInline(*data_->type(), this));
     *out = result_;
     return Status::OK();
   }
 
  private:
-  std::shared_ptr<Column> col_;
-  const ChunkedArray& data_;
+  std::shared_ptr<ChunkedArray> data_;
   PandasOptions options_;
   PyObject* py_ref_;
   PyArrayObject* arr_;
@@ -1920,25 +1901,14 @@ class ArrowDeserializer {
 Status ConvertArrayToPandas(const PandasOptions& options,
                             const std::shared_ptr<Array>& arr, PyObject* py_ref,
                             PyObject** out) {
-  static std::string dummy_name = "dummy";
-  auto field = std::make_shared<Field>(dummy_name, arr->type());
-  auto col = std::make_shared<Column>(field, arr);
-  return ConvertColumnToPandas(options, col, py_ref, out);
+  auto carr = std::make_shared<ChunkedArray>(arr);
+  return ConvertChunkedArrayToPandas(options, carr, py_ref, out);
 }
 
 Status ConvertChunkedArrayToPandas(const PandasOptions& options,
                                    const std::shared_ptr<ChunkedArray>& ca,
                                    PyObject* py_ref, PyObject** out) {
-  static std::string dummy_name = "dummy";
-  auto field = std::make_shared<Field>(dummy_name, ca->type());
-  auto col = std::make_shared<Column>(field, ca);
-  return ConvertColumnToPandas(options, col, py_ref, out);
-}
-
-Status ConvertColumnToPandas(const PandasOptions& options,
-                             const std::shared_ptr<Column>& col, PyObject* py_ref,
-                             PyObject** out) {
-  ArrowDeserializer converter(options, col, py_ref);
+  ArrowDeserializer converter(options, ca, py_ref);
   return converter.Convert(out);
 }
 
@@ -1957,16 +1927,14 @@ Status ConvertTableToPandas(const PandasOptions& options,
   if (!categorical_columns.empty()) {
     FunctionContext ctx;
     for (int i = 0; i < table->num_columns(); i++) {
-      const Column& col = *table->column(i);
-      if (categorical_columns.count(col.name())) {
+      std::shared_ptr<ChunkedArray> col = table->column(i);
+      if (categorical_columns.count(table->field(i)->name())) {
         Datum out;
-        RETURN_NOT_OK(DictionaryEncode(&ctx, Datum(col.data()), &out));
+        RETURN_NOT_OK(DictionaryEncode(&ctx, Datum(col), &out));
         std::shared_ptr<ChunkedArray> array = out.chunked_array();
-        auto field = std::make_shared<Field>(
-            col.name(), array->type(), col.field()->nullable(), col.field()->metadata());
-        auto column = std::make_shared<Column>(field, array);
+        auto field = table->field(i)->WithType(array->type());
         RETURN_NOT_OK(current_table->RemoveColumn(i, &current_table));
-        RETURN_NOT_OK(current_table->AddColumn(i, column, &current_table));
+        RETURN_NOT_OK(current_table->AddColumn(i, field, array, &current_table));
       }
     }
   }
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index aa44ec07e65..3cebc03cd22 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -23,11 +23,15 @@
 
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
 #include "arrow/python/helpers.h"
 
 namespace arrow {
+
+using internal::checked_cast;
+
 namespace py {
 
 static std::mutex memory_pool_mutex;
@@ -47,6 +51,129 @@ MemoryPool* get_memory_pool() {
   }
 }
 
+// ----------------------------------------------------------------------
+// PythonErrorDetail
+
+namespace {
+
+const char kErrorDetailTypeId[] = "arrow::py::PythonErrorDetail";
+
+// Try to match the Python exception type with an appropriate Status code
+StatusCode MapPyError(PyObject* exc_type) {
+  StatusCode code;
+
+  if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+    code = StatusCode::OutOfMemory;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_IndexError)) {
+    code = StatusCode::IndexError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+    code = StatusCode::KeyError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+    code = StatusCode::TypeError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError) ||
+             PyErr_GivenExceptionMatches(exc_type, PyExc_OverflowError)) {
+    code = StatusCode::Invalid;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+    code = StatusCode::IOError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
+    code = StatusCode::NotImplemented;
+  } else {
+    code = StatusCode::UnknownError;
+  }
+  return code;
+}
+
+// PythonErrorDetail indicates a Python exception was raised.
+class PythonErrorDetail : public StatusDetail {
+ public:
+  const char* type_id() const override { return kErrorDetailTypeId; }
+
+  std::string ToString() const override {
+    // This is simple enough not to need the GIL
+    const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
+    // XXX Should we also print traceback?
+    return std::string("Python exception: ") + ty->tp_name;
+  }
+
+  void RestorePyError() const {
+    Py_INCREF(exc_type_.obj());
+    Py_INCREF(exc_value_.obj());
+    Py_INCREF(exc_traceback_.obj());
+    PyErr_Restore(exc_type_.obj(), exc_value_.obj(), exc_traceback_.obj());
+  }
+
+  PyObject* exc_type() const { return exc_type_.obj(); }
+
+  PyObject* exc_value() const { return exc_value_.obj(); }
+
+  static std::shared_ptr<PythonErrorDetail> FromPyError() {
+    PyObject* exc_type = nullptr;
+    PyObject* exc_value = nullptr;
+    PyObject* exc_traceback = nullptr;
+
+    PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+    PyErr_NormalizeException(&exc_type, &exc_value, &exc_traceback);
+    ARROW_CHECK(exc_type)
+        << "PythonErrorDetail::FromPyError called without a Python error set";
+    DCHECK(PyType_Check(exc_type));
+    DCHECK(exc_value);  // Ensured by PyErr_NormalizeException, double-check
+    if (exc_traceback == nullptr) {
+      // Needed by PyErr_Restore()
+      Py_INCREF(Py_None);
+      exc_traceback = Py_None;
+    }
+
+    std::shared_ptr<PythonErrorDetail> detail(new PythonErrorDetail);
+    detail->exc_type_.reset(exc_type);
+    detail->exc_value_.reset(exc_value);
+    detail->exc_traceback_.reset(exc_traceback);
+    return detail;
+  }
+
+ protected:
+  PythonErrorDetail() = default;
+
+  OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;
+};
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Python exception <-> Status
+
+Status ConvertPyError(StatusCode code) {
+  auto detail = PythonErrorDetail::FromPyError();
+  if (code == StatusCode::UnknownError) {
+    code = MapPyError(detail->exc_type());
+  }
+
+  std::string message;
+  RETURN_NOT_OK(internal::PyObject_StdStringStr(detail->exc_value(), &message));
+  return Status(code, message, detail);
+}
+
+Status PassPyError() {
+  if (PyErr_Occurred()) {
+    return ConvertPyError();
+  }
+  return Status::OK();
+}
+
+bool IsPyError(const Status& status) {
+  if (status.ok()) {
+    return false;
+  }
+  auto detail = status.detail();
+  bool result = detail != nullptr && detail->type_id() == kErrorDetailTypeId;
+  return result;
+}
+
+void RestorePyError(const Status& status) {
+  ARROW_CHECK(IsPyError(status));
+  const auto& detail = checked_cast<const PythonErrorDetail&>(*status.detail());
+  detail.RestorePyError();
+}
+
 // ----------------------------------------------------------------------
 // PyBuffer
 
@@ -64,7 +191,7 @@ Status PyBuffer::Init(PyObject* obj) {
     }
     return Status::OK();
   } else {
-    return Status(StatusCode::PythonError, "");
+    return ConvertPyError(StatusCode::Invalid);
   }
 }
 
@@ -83,56 +210,5 @@ PyBuffer::~PyBuffer() {
   }
 }
 
-// ----------------------------------------------------------------------
-// Python exception -> Status
-
-Status ConvertPyError(StatusCode code) {
-  PyObject* exc_type = nullptr;
-  PyObject* exc_value = nullptr;
-  PyObject* traceback = nullptr;
-
-  PyErr_Fetch(&exc_type, &exc_value, &traceback);
-  PyErr_NormalizeException(&exc_type, &exc_value, &traceback);
-
-  DCHECK_NE(exc_type, nullptr) << "ConvertPyError called without an exception set";
-
-  OwnedRef exc_type_ref(exc_type);
-  OwnedRef exc_value_ref(exc_value);
-  OwnedRef traceback_ref(traceback);
-
-  std::string message;
-  RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
-
-  if (code == StatusCode::UnknownError) {
-    // Try to match the Python exception type with an appropriate Status code
-    if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
-      code = StatusCode::OutOfMemory;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_IndexError)) {
-      code = StatusCode::IndexError;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
-      code = StatusCode::KeyError;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
-      code = StatusCode::TypeError;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError) ||
-               PyErr_GivenExceptionMatches(exc_type, PyExc_OverflowError)) {
-      code = StatusCode::Invalid;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
-      code = StatusCode::IOError;
-    } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
-      code = StatusCode::NotImplemented;
-    }
-  }
-  return Status(code, message);
-}
-
-Status PassPyError() {
-  if (PyErr_Occurred()) {
-    // Do not call PyErr_Clear, the assumption is that someone further
-    // up the call stack will want to deal with the Python error.
-    return Status(StatusCode::PythonError, "");
-  }
-  return Status::OK();
-}
-
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 766b76418de..5a157dd8810 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -36,7 +36,15 @@ class Result;
 
 namespace py {
 
+// Convert current Python error to a Status.  The Python error state is cleared
+// and can be restored with RestorePyError().
 ARROW_PYTHON_EXPORT Status ConvertPyError(StatusCode code = StatusCode::UnknownError);
+// Same as ConvertPyError(), but returns Status::OK() if no Python error is set.
+ARROW_PYTHON_EXPORT Status PassPyError();
+// Query whether the given Status is a Python error (as wrapped by ConvertPyError()).
+ARROW_PYTHON_EXPORT bool IsPyError(const Status& status);
+// Restore a Python error wrapped in a Status.
+ARROW_PYTHON_EXPORT void RestorePyError(const Status& status);
 
 // Catch a pending Python exception and return the corresponding Status.
 // If no exception is pending, Status::OK() is returned.
@@ -48,9 +56,6 @@ inline Status CheckPyError(StatusCode code = StatusCode::UnknownError) {
   }
 }
 
-ARROW_PYTHON_EXPORT Status PassPyError();
-
-// TODO(wesm): We can just let errors pass through. To be explored later
 #define RETURN_IF_PYERROR() ARROW_RETURN_NOT_OK(CheckPyError());
 
 #define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE));
@@ -65,6 +70,7 @@ T GetResultValue(Result<T>& result) {
   } else {
     int r = internal::check_status(result.status());
     assert(r == -1);  // should have errored out
+    ARROW_UNUSED(r);
     return {};
   }
 }
@@ -97,6 +103,18 @@ class ARROW_PYTHON_EXPORT PyAcquireGIL {
   ARROW_DISALLOW_COPY_AND_ASSIGN(PyAcquireGIL);
 };
 
+// A RAII-style helper that releases the GIL until the end of a lexical block
+class ARROW_PYTHON_EXPORT PyReleaseGIL {
+ public:
+  PyReleaseGIL() { saved_state_ = PyEval_SaveThread(); }
+
+  ~PyReleaseGIL() { PyEval_RestoreThread(saved_state_); }
+
+ private:
+  PyThreadState* saved_state_;
+  ARROW_DISALLOW_COPY_AND_ASSIGN(PyReleaseGIL);
+};
+
 // A helper to call safely into the Python interpreter from arbitrary C++ code.
 // The GIL is acquired, and the current thread's error status is preserved.
 template <typename Function>
@@ -109,7 +127,7 @@ Status SafeCallIntoPython(Function&& func) {
   Status st = std::forward<Function>(func)();
   // If the return Status is a "Python error", the current Python error status
   // describes the error and shouldn't be clobbered.
-  if (!st.IsPythonError() && exc_type != NULLPTR) {
+  if (!IsPyError(st) && exc_type != NULLPTR) {
     PyErr_Restore(exc_type, exc_value, exc_traceback);
   }
   return st;
diff --git a/cpp/src/arrow/python/deserialize.cc b/cpp/src/arrow/python/deserialize.cc
index 5e6e1358906..45f7d61890e 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -235,7 +235,7 @@ Status DeserializeSequence(PyObject* context, const Array& array, int64_t start_
       int64_t offset = value_offsets[i];
       uint8_t type = type_ids[i];
       PyObject* value;
-      RETURN_NOT_OK(GetValue(context, *data.UnsafeChild(type), offset,
+      RETURN_NOT_OK(GetValue(context, *data.child(type), offset,
                              python_types[type_ids[i]], base, blobs, &value));
       RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
     }
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index f7068b353be..30e61985027 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -25,8 +25,10 @@
 #include <vector>
 
 #include "arrow/buffer.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
+#include "arrow/util/logging.h"
 
 #include "arrow/python/common.h"
 #include "arrow/python/pyarrow.h"
@@ -35,15 +37,6 @@
 namespace arrow {
 namespace py {
 
-bool is_contiguous(PyObject* array) {
-  if (PyArray_Check(array)) {
-    return (PyArray_FLAGS(reinterpret_cast<PyArrayObject*>(array)) &
-            (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)) != 0;
-  } else {
-    return false;
-  }
-}
-
 NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
   PyAcquireGIL lock;
   arr_ = ao;
@@ -71,6 +64,8 @@ NumPyBuffer::~NumPyBuffer() {
     *out = FACTORY();                         \
     break;
 
+namespace {
+
 Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
   if (!PyArray_DescrCheck(dtype)) {
     return Status::TypeError("Did not pass numpy.dtype object");
@@ -125,6 +120,8 @@ Status GetNumPyType(const DataType& type, int* type_num) {
   return Status::OK();
 }
 
+}  // namespace
+
 Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
   if (!PyArray_DescrCheck(dtype)) {
     return Status::TypeError("Did not pass numpy.dtype object");
@@ -186,7 +183,9 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
 
 #undef TO_ARROW_TYPE_CASE
 
-Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out) {
+Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
+                       const std::vector<std::string>& dim_names,
+                       std::shared_ptr<Tensor>* out) {
   if (!PyArray_Check(ao)) {
     return Status::TypeError("Did not pass ndarray object");
   }
@@ -197,35 +196,29 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>*
 
   int ndim = PyArray_NDIM(ndarray);
 
-  // This is also holding the GIL, so don't already draw it.
   std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
   std::vector<int64_t> shape(ndim);
   std::vector<int64_t> strides(ndim);
 
-  {
-    PyAcquireGIL lock;
-    npy_intp* array_strides = PyArray_STRIDES(ndarray);
-    npy_intp* array_shape = PyArray_SHAPE(ndarray);
-    for (int i = 0; i < ndim; ++i) {
-      if (array_strides[i] < 0) {
-        return Status::Invalid("Negative ndarray strides not supported");
-      }
-      shape[i] = array_shape[i];
-      strides[i] = array_strides[i];
+  npy_intp* array_strides = PyArray_STRIDES(ndarray);
+  npy_intp* array_shape = PyArray_SHAPE(ndarray);
+  for (int i = 0; i < ndim; ++i) {
+    if (array_strides[i] < 0) {
+      return Status::Invalid("Negative ndarray strides not supported");
     }
-
-    std::shared_ptr<DataType> type;
-    RETURN_NOT_OK(
-        GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray)), &type));
-    *out = std::make_shared<Tensor>(type, data, shape, strides);
-    return Status::OK();
+    shape[i] = array_shape[i];
+    strides[i] = array_strides[i];
   }
+
+  std::shared_ptr<DataType> type;
+  RETURN_NOT_OK(
+      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray)), &type));
+  *out = std::make_shared<Tensor>(type, data, shape, strides, dim_names);
+  return Status::OK();
 }
 
 Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
                        PyObject** out) {
-  PyAcquireGIL lock;
-
   int type_num;
   RETURN_NOT_OK(GetNumPyType(*tensor->type(), &type_num));
   PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);
@@ -274,5 +267,140 @@ Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
   return Status::OK();
 }
 
+// Wrap the dense data of a sparse tensor in a ndarray
+static Status SparseTensorDataToNdarray(const SparseTensor& sparse_tensor,
+                                        std::vector<npy_intp> data_shape, PyObject* base,
+                                        PyObject** out_data) {
+  int type_num_data;
+  RETURN_NOT_OK(GetNumPyType(*sparse_tensor.type(), &type_num_data));
+  PyArray_Descr* dtype_data = PyArray_DescrNewFromType(type_num_data);
+  RETURN_IF_PYERROR();
+
+  const void* immutable_data = sparse_tensor.data()->data();
+  // Remove const =(
+  void* mutable_data = const_cast<void*>(immutable_data);
+  int array_flags = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS;
+  if (sparse_tensor.is_mutable()) {
+    array_flags |= NPY_ARRAY_WRITEABLE;
+  }
+
+  *out_data = PyArray_NewFromDescr(&PyArray_Type, dtype_data,
+                                   static_cast<int>(data_shape.size()), data_shape.data(),
+                                   nullptr, mutable_data, array_flags, nullptr);
+  RETURN_IF_PYERROR()
+  Py_XINCREF(base);
+  PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(*out_data), base);
+  return Status::OK();
+}
+
+Status SparseTensorCOOToNdarray(const std::shared_ptr<SparseTensorCOO>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_coords) {
+  const auto& sparse_index = arrow::internal::checked_cast<const SparseCOOIndex&>(
+      *sparse_tensor->sparse_index());
+
+  // Wrap tensor data
+  OwnedRef result_data;
+  RETURN_NOT_OK(SparseTensorDataToNdarray(
+      *sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref()));
+
+  // Wrap indices
+  PyObject* result_coords;
+  RETURN_NOT_OK(TensorToNdarray(sparse_index.indices(), base, &result_coords));
+
+  *out_data = result_data.detach();
+  *out_coords = result_coords;
+  return Status::OK();
+}
+
+Status SparseTensorCSRToNdarray(const std::shared_ptr<SparseTensorCSR>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_indptr, PyObject** out_indices) {
+  const auto& sparse_index = arrow::internal::checked_cast<const SparseCSRIndex&>(
+      *sparse_tensor->sparse_index());
+
+  // Wrap tensor data
+  OwnedRef result_data;
+  RETURN_NOT_OK(SparseTensorDataToNdarray(
+      *sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref()));
+
+  // Wrap indices
+  OwnedRef result_indptr;
+  OwnedRef result_indices;
+  RETURN_NOT_OK(TensorToNdarray(sparse_index.indptr(), base, result_indptr.ref()));
+  RETURN_NOT_OK(TensorToNdarray(sparse_index.indices(), base, result_indices.ref()));
+
+  *out_data = result_data.detach();
+  *out_indptr = result_indptr.detach();
+  *out_indices = result_indices.detach();
+  return Status::OK();
+}
+
+Status NdarraysToSparseTensorCOO(MemoryPool* pool, PyObject* data_ao, PyObject* coords_ao,
+                                 const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseTensorCOO>* out) {
+  if (!PyArray_Check(data_ao) || !PyArray_Check(coords_ao)) {
+    return Status::TypeError("Did not pass ndarray object");
+  }
+
+  PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
+  std::shared_ptr<DataType> type_data;
+  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
+                              &type_data));
+
+  std::shared_ptr<Tensor> coords;
+  RETURN_NOT_OK(NdarrayToTensor(pool, coords_ao, {}, &coords));
+  ARROW_CHECK_EQ(coords->type_id(), Type::INT64);  // Should be ensured by caller
+
+  std::shared_ptr<SparseCOOIndex> sparse_index = std::make_shared<SparseCOOIndex>(
+      std::static_pointer_cast<NumericTensor<Int64Type>>(coords));
+  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type_data, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+Status NdarraysToSparseTensorCSR(MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
+                                 PyObject* indices_ao, const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseTensorCSR>* out) {
+  if (!PyArray_Check(data_ao) || !PyArray_Check(indptr_ao) ||
+      !PyArray_Check(indices_ao)) {
+    return Status::TypeError("Did not pass ndarray object");
+  }
+
+  PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
+  std::shared_ptr<DataType> type_data;
+  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
+                              &type_data));
+
+  std::shared_ptr<Tensor> indptr, indices;
+  RETURN_NOT_OK(NdarrayToTensor(pool, indptr_ao, {}, &indptr));
+  RETURN_NOT_OK(NdarrayToTensor(pool, indices_ao, {}, &indices));
+  ARROW_CHECK_EQ(indptr->type_id(), Type::INT64);   // Should be ensured by caller
+  ARROW_CHECK_EQ(indices->type_id(), Type::INT64);  // Should be ensured by caller
+
+  auto sparse_index = std::make_shared<SparseCSRIndex>(
+      std::static_pointer_cast<NumericTensor<Int64Type>>(indptr),
+      std::static_pointer_cast<NumericTensor<Int64Type>>(indices));
+  *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type_data, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+Status TensorToSparseTensorCOO(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseTensorCOO>* out) {
+  *out = std::make_shared<SparseTensorCOO>(*tensor);
+  return Status::OK();
+}
+
+Status TensorToSparseTensorCSR(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseTensorCSR>* out) {
+  *out = std::make_shared<SparseTensorCSR>(*tensor);
+  return Status::OK();
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/numpy_convert.h b/cpp/src/arrow/python/numpy_convert.h
index dce5fe522d6..85ef36b1bc5 100644
--- a/cpp/src/arrow/python/numpy_convert.h
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -25,9 +25,11 @@
 
 #include <memory>
 #include <string>
+#include <vector>
 
 #include "arrow/buffer.h"
 #include "arrow/python/visibility.h"
+#include "arrow/sparse_tensor.h"
 
 namespace arrow {
 
@@ -47,27 +49,44 @@ class ARROW_PYTHON_EXPORT NumPyBuffer : public Buffer {
   PyObject* arr_;
 };
 
-// Handle misbehaved types like LONGLONG and ULONGLONG
-ARROW_PYTHON_EXPORT
-int cast_npy_type_compat(int type_num);
-
-ARROW_PYTHON_EXPORT
-bool is_contiguous(PyObject* array);
-
 ARROW_PYTHON_EXPORT
 Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
 ARROW_PYTHON_EXPORT
 Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out);
 
-Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out);
-Status GetNumPyType(const DataType& type, int* type_num);
-
 ARROW_PYTHON_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
+                                           const std::vector<std::string>& dim_names,
                                            std::shared_ptr<Tensor>* out);
 
 ARROW_PYTHON_EXPORT Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor,
                                            PyObject* base, PyObject** out);
 
+ARROW_PYTHON_EXPORT Status
+SparseTensorCOOToNdarray(const std::shared_ptr<SparseTensorCOO>& sparse_tensor,
+                         PyObject* base, PyObject** out_data, PyObject** out_coords);
+
+ARROW_PYTHON_EXPORT Status SparseTensorCSRToNdarray(
+    const std::shared_ptr<SparseTensorCSR>& sparse_tensor, PyObject* base,
+    PyObject** out_data, PyObject** out_indptr, PyObject** out_indices);
+
+ARROW_PYTHON_EXPORT Status NdarraysToSparseTensorCOO(
+    MemoryPool* pool, PyObject* data_ao, PyObject* coords_ao,
+    const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names,
+    std::shared_ptr<SparseTensorCOO>* out);
+
+ARROW_PYTHON_EXPORT Status NdarraysToSparseTensorCSR(
+    MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao, PyObject* indices_ao,
+    const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names,
+    std::shared_ptr<SparseTensorCSR>* out);
+
+ARROW_PYTHON_EXPORT Status
+TensorToSparseTensorCOO(const std::shared_ptr<Tensor>& tensor,
+                        std::shared_ptr<SparseTensorCOO>* csparse_tensor);
+
+ARROW_PYTHON_EXPORT Status
+TensorToSparseTensorCSR(const std::shared_ptr<Tensor>& tensor,
+                        std::shared_ptr<SparseTensorCSR>* csparse_tensor);
+
 }  // namespace py
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index b353a1e02cf..811a31f5db8 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -800,6 +800,9 @@ Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pa
   if (!PyArray_Check(ao)) {
     return Status::Invalid("Input object was not a NumPy array");
   }
+  if (PyArray_NDIM(reinterpret_cast<PyArrayObject*>(ao)) != 1) {
+    return Status::Invalid("only handle 1-dimensional arrays");
+  }
 
   NumPyConverter converter(pool, ao, mo, type, from_pandas, cast_options);
   RETURN_NOT_OK(converter.Convert());
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
index 1cedc549cfa..38fd56d80fc 100644
--- a/cpp/src/arrow/python/pyarrow.cc
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -123,19 +123,42 @@ PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor) {
   return ::pyarrow_wrap_tensor(tensor);
 }
 
-bool is_column(PyObject* column) { return ::pyarrow_is_column(column) != 0; }
+bool is_sparse_tensor_csr(PyObject* sparse_tensor) {
+  return ::pyarrow_is_sparse_tensor_csr(sparse_tensor) != 0;
+}
+
+Status unwrap_sparse_tensor_csr(PyObject* sparse_tensor,
+                                std::shared_ptr<SparseTensorCSR>* out) {
+  *out = ::pyarrow_unwrap_sparse_tensor_csr(sparse_tensor);
+  if (*out) {
+    return Status::OK();
+  } else {
+    return Status::Invalid(
+        "Could not unwrap SparseTensorCSR from the passed Python object.");
+  }
+}
+
+PyObject* wrap_sparse_tensor_csr(const std::shared_ptr<SparseTensorCSR>& sparse_tensor) {
+  return ::pyarrow_wrap_sparse_tensor_csr(sparse_tensor);
+}
+
+bool is_sparse_tensor_coo(PyObject* sparse_tensor) {
+  return ::pyarrow_is_sparse_tensor_coo(sparse_tensor) != 0;
+}
 
-Status unwrap_column(PyObject* column, std::shared_ptr<Column>* out) {
-  *out = ::pyarrow_unwrap_column(column);
+Status unwrap_sparse_tensor_coo(PyObject* sparse_tensor,
+                                std::shared_ptr<SparseTensorCOO>* out) {
+  *out = ::pyarrow_unwrap_sparse_tensor_coo(sparse_tensor);
   if (*out) {
     return Status::OK();
   } else {
-    return Status::Invalid("Could not unwrap Column from the passed Python object.");
+    return Status::Invalid(
+        "Could not unwrap SparseTensorCOO from the passed Python object.");
   }
 }
 
-PyObject* wrap_column(const std::shared_ptr<Column>& column) {
-  return ::pyarrow_wrap_column(column);
+PyObject* wrap_sparse_tensor_coo(const std::shared_ptr<SparseTensorCOO>& sparse_tensor) {
+  return ::pyarrow_wrap_sparse_tensor_coo(sparse_tensor);
 }
 
 bool is_table(PyObject* table) { return ::pyarrow_is_table(table) != 0; }
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
index ff5bf8f01dd..aad7a4a5dd9 100644
--- a/cpp/src/arrow/python/pyarrow.h
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -24,11 +24,12 @@
 
 #include "arrow/python/visibility.h"
 
+#include "arrow/sparse_tensor.h"
+
 namespace arrow {
 
 class Array;
 class Buffer;
-class Column;
 class DataType;
 class Field;
 class RecordBatch;
@@ -67,9 +68,17 @@ ARROW_PYTHON_EXPORT bool is_tensor(PyObject* tensor);
 ARROW_PYTHON_EXPORT Status unwrap_tensor(PyObject* tensor, std::shared_ptr<Tensor>* out);
 ARROW_PYTHON_EXPORT PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor);
 
-ARROW_PYTHON_EXPORT bool is_column(PyObject* column);
-ARROW_PYTHON_EXPORT Status unwrap_column(PyObject* column, std::shared_ptr<Column>* out);
-ARROW_PYTHON_EXPORT PyObject* wrap_column(const std::shared_ptr<Column>& column);
+ARROW_PYTHON_EXPORT bool is_sparse_tensor_coo(PyObject* sparse_tensor);
+ARROW_PYTHON_EXPORT Status
+unwrap_sparse_tensor_coo(PyObject* sparse_tensor, std::shared_ptr<SparseTensorCOO>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_sparse_tensor_coo(
+    const std::shared_ptr<SparseTensorCOO>& sparse_tensor);
+
+ARROW_PYTHON_EXPORT bool is_sparse_tensor_csr(PyObject* sparse_tensor);
+ARROW_PYTHON_EXPORT Status
+unwrap_sparse_tensor_csr(PyObject* sparse_tensor, std::shared_ptr<SparseTensorCSR>* out);
+ARROW_PYTHON_EXPORT PyObject* wrap_sparse_tensor_csr(
+    const std::shared_ptr<SparseTensorCSR>& sparse_tensor);
 
 ARROW_PYTHON_EXPORT bool is_table(PyObject* table);
 ARROW_PYTHON_EXPORT Status unwrap_table(PyObject* table, std::shared_ptr<Table>* out);
diff --git a/cpp/src/arrow/python/pyarrow_api.h b/cpp/src/arrow/python/pyarrow_api.h
index b76e9614a8a..76e72812361 100644
--- a/cpp/src/arrow/python/pyarrow_api.h
+++ b/cpp/src/arrow/python/pyarrow_api.h
@@ -36,8 +36,6 @@ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr
 #define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
 static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer>  const &) = 0;
 #define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
-static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column)(std::shared_ptr< arrow::Column>  const &) = 0;
-#define pyarrow_wrap_column __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column
 static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType>  const &) = 0;
 #define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
 static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field>  const &) = 0;
@@ -50,14 +48,16 @@ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr
 #define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
 static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor>  const &) = 0;
 #define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_csr)(std::shared_ptr< arrow::SparseTensorCSR>  const &) = 0;
+#define pyarrow_wrap_sparse_tensor_csr __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_csr
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_coo)(std::shared_ptr< arrow::SparseTensorCOO>  const &) = 0;
+#define pyarrow_wrap_sparse_tensor_coo __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_coo
 static std::shared_ptr< arrow::Array>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array)(PyObject *) = 0;
 #define pyarrow_unwrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array
 static std::shared_ptr< arrow::RecordBatch>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch)(PyObject *) = 0;
 #define pyarrow_unwrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch
 static std::shared_ptr< arrow::Buffer>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer)(PyObject *) = 0;
 #define pyarrow_unwrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer
-static std::shared_ptr< arrow::Column>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_column)(PyObject *) = 0;
-#define pyarrow_unwrap_column __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_column
 static std::shared_ptr< arrow::DataType>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type)(PyObject *) = 0;
 #define pyarrow_unwrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type
 static std::shared_ptr< arrow::Field>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field)(PyObject *) = 0;
@@ -68,6 +68,10 @@ static std::shared_ptr< arrow::Table>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwra
 #define pyarrow_unwrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table
 static std::shared_ptr< arrow::Tensor>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor)(PyObject *) = 0;
 #define pyarrow_unwrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor
+static std::shared_ptr< arrow::SparseTensorCSR>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_csr)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_tensor_csr __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_csr
+static std::shared_ptr< arrow::SparseTensorCOO>  (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_coo)(PyObject *) = 0;
+#define pyarrow_unwrap_sparse_tensor_coo __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_coo
 static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status)(arrow::Status const &) = 0;
 #define pyarrow_internal_check_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status
 static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer)(PyObject *) = 0;
@@ -84,8 +88,10 @@ static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar)(std::shared_pt
 #define pyarrow_wrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar
 static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor)(PyObject *) = 0;
 #define pyarrow_is_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor
-static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_column)(PyObject *) = 0;
-#define pyarrow_is_column __pyx_api_f_7pyarrow_3lib_pyarrow_is_column
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_csr)(PyObject *) = 0;
+#define pyarrow_is_sparse_tensor_csr __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_csr
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_coo)(PyObject *) = 0;
+#define pyarrow_is_sparse_tensor_coo __pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_coo
 static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_table)(PyObject *) = 0;
 #define pyarrow_is_table __pyx_api_f_7pyarrow_3lib_pyarrow_is_table
 static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch)(PyObject *) = 0;
@@ -160,22 +166,24 @@ static int import_pyarrow__lib(void) {
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer>  const &)") < 0) goto bad;
-  if (__Pyx_ImportFunction(module, "pyarrow_wrap_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column, "PyObject *(std::shared_ptr< arrow::Column>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_resizable_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer, "PyObject *(std::shared_ptr< arrow::ResizableBuffer>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_sparse_tensor_csr", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_csr, "PyObject *(std::shared_ptr< arrow::SparseTensorCSR>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_sparse_tensor_coo", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_csr, "PyObject *(std::shared_ptr< arrow::SparseTensorCOO>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_array, "std::shared_ptr< arrow::Array>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_batch, "std::shared_ptr< arrow::RecordBatch>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_buffer, "std::shared_ptr< arrow::Buffer>  (PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction(module, "pyarrow_unwrap_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_column, "std::shared_ptr< arrow::Column>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_data_type, "std::shared_ptr< arrow::DataType>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_field, "std::shared_ptr< arrow::Field>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_unwrap_sparse_tensor_csr", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_csr, "std::shared_ptr< arrow::SparseTensorCSR>  (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_unwrap_sparse_tensor_coo", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_coo, "std::shared_ptr< arrow::SparseTensorCOO>  (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad;
@@ -184,7 +192,8 @@ static int import_pyarrow__lib(void) {
   if (__Pyx_ImportFunction(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar>  const &)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction(module, "pyarrow_is_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_column, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_is_sparse_tensor_csr", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_csr, "int (PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_is_sparse_tensor_coo", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_sparse_tensor_coo, "int (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad;
   if (__Pyx_ImportFunction(module, "pyarrow_is_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_batch, "int (PyObject *)") < 0) goto bad;
   Py_DECREF(module); module = 0;
diff --git a/cpp/src/arrow/python/pyarrow_lib.h b/cpp/src/arrow/python/pyarrow_lib.h
index 5f5fc4c6b6f..a4bc1039ee8 100644
--- a/cpp/src/arrow/python/pyarrow_lib.h
+++ b/cpp/src/arrow/python/pyarrow_lib.h
@@ -48,6 +48,8 @@ __PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_resizable_buffer(std
 __PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_schema(std::shared_ptr< arrow::Schema>  const &);
 __PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_table(std::shared_ptr< arrow::Table>  const &);
 __PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_tensor(std::shared_ptr< arrow::Tensor>  const &);
+__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_coo(std::shared_ptr< arrow::SparseTensorCOO>  const &);
+__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_sparse_tensor_csr(std::shared_ptr< arrow::SparseTensorCSR>  const &);
 __PYX_EXTERN_C std::shared_ptr< arrow::Array>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_array(PyObject *);
 __PYX_EXTERN_C std::shared_ptr< arrow::RecordBatch>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_batch(PyObject *);
 __PYX_EXTERN_C std::shared_ptr< arrow::Buffer>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_buffer(PyObject *);
@@ -57,6 +59,8 @@ __PYX_EXTERN_C std::shared_ptr< arrow::Field>  __pyx_f_7pyarrow_3lib_pyarrow_unw
 __PYX_EXTERN_C std::shared_ptr< arrow::Schema>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_schema(PyObject *);
 __PYX_EXTERN_C std::shared_ptr< arrow::Table>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_table(PyObject *);
 __PYX_EXTERN_C std::shared_ptr< arrow::Tensor>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_tensor(PyObject *);
+__PYX_EXTERN_C std::shared_ptr< arrow::SparseTensorCOO>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_coo(PyObject *);
+__PYX_EXTERN_C std::shared_ptr< arrow::SparseTensorCSR>  __pyx_f_7pyarrow_3lib_pyarrow_unwrap_sparse_tensor_csr(PyObject *);
 
 #endif /* !__PYX_HAVE_API__pyarrow__lib */
 
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index 5de613f0e50..5027d3fe3f6 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -40,21 +40,12 @@ using internal::checked_cast;
 
 namespace py {
 
-TEST(PyBuffer, InvalidInputObject) {
-  std::shared_ptr<Buffer> res;
-  PyObject* input = Py_None;
-  auto old_refcnt = Py_REFCNT(input);
-  ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res));
-  PyErr_Clear();
-  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
-}
-
 TEST(OwnedRef, TestMoves) {
-  PyAcquireGIL lock;
   std::vector<OwnedRef> vec;
   PyObject *u, *v;
   u = PyList_New(0);
   v = PyList_New(0);
+
   {
     OwnedRef ref(u);
     vec.push_back(std::move(ref));
@@ -66,31 +57,42 @@ TEST(OwnedRef, TestMoves) {
 }
 
 TEST(OwnedRefNoGIL, TestMoves) {
-  std::vector<OwnedRefNoGIL> vec;
-  PyObject *u, *v;
-  {
-    PyAcquireGIL lock;
-    u = PyList_New(0);
-    v = PyList_New(0);
-  }
+  PyAcquireGIL lock;
+  lock.release();
+
   {
-    OwnedRefNoGIL ref(u);
-    vec.push_back(std::move(ref));
-    ASSERT_EQ(ref.obj(), nullptr);
+    std::vector<OwnedRef> vec;
+    PyObject *u, *v;
+    {
+      lock.acquire();
+      u = PyList_New(0);
+      v = PyList_New(0);
+      lock.release();
+    }
+    {
+      OwnedRefNoGIL ref(u);
+      vec.push_back(std::move(ref));
+      ASSERT_EQ(ref.obj(), nullptr);
+    }
+    vec.emplace_back(v);
+    ASSERT_EQ(Py_REFCNT(u), 1);
+    ASSERT_EQ(Py_REFCNT(v), 1);
   }
-  vec.emplace_back(v);
-  ASSERT_EQ(Py_REFCNT(u), 1);
-  ASSERT_EQ(Py_REFCNT(v), 1);
 }
 
 TEST(CheckPyError, TestStatus) {
-  PyAcquireGIL lock;
   Status st;
 
-  auto check_error = [](Status& st, const char* expected_message = "some error") {
+  auto check_error = [](Status& st, const char* expected_message = "some error",
+                        const char* expected_detail = nullptr) {
     st = CheckPyError();
     ASSERT_EQ(st.message(), expected_message);
     ASSERT_FALSE(PyErr_Occurred());
+    if (expected_detail) {
+      auto detail = st.detail();
+      ASSERT_NE(detail, nullptr);
+      ASSERT_EQ(detail->ToString(), expected_detail);
+    }
   };
 
   for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) {
@@ -100,7 +102,7 @@ TEST(CheckPyError, TestStatus) {
   }
 
   PyErr_SetString(PyExc_TypeError, "some error");
-  check_error(st);
+  check_error(st, "some error", "Python exception: TypeError");
   ASSERT_TRUE(st.IsTypeError());
 
   PyErr_SetString(PyExc_ValueError, "some error");
@@ -118,7 +120,7 @@ TEST(CheckPyError, TestStatus) {
   }
 
   PyErr_SetString(PyExc_NotImplementedError, "some error");
-  check_error(st);
+  check_error(st, "some error", "Python exception: NotImplementedError");
   ASSERT_TRUE(st.IsNotImplemented());
 
   // No override if a specific status code is given
@@ -129,6 +131,52 @@ TEST(CheckPyError, TestStatus) {
   ASSERT_FALSE(PyErr_Occurred());
 }
 
+TEST(CheckPyError, TestStatusNoGIL) {
+  PyAcquireGIL lock;
+  {
+    Status st;
+    PyErr_SetString(PyExc_ZeroDivisionError, "zzzt");
+    st = ConvertPyError();
+    ASSERT_FALSE(PyErr_Occurred());
+    lock.release();
+    ASSERT_TRUE(st.IsUnknownError());
+    ASSERT_EQ(st.message(), "zzzt");
+    ASSERT_EQ(st.detail()->ToString(), "Python exception: ZeroDivisionError");
+  }
+}
+
+TEST(RestorePyError, Basics) {
+  PyErr_SetString(PyExc_ZeroDivisionError, "zzzt");
+  auto st = ConvertPyError();
+  ASSERT_FALSE(PyErr_Occurred());
+  ASSERT_TRUE(st.IsUnknownError());
+  ASSERT_EQ(st.message(), "zzzt");
+  ASSERT_EQ(st.detail()->ToString(), "Python exception: ZeroDivisionError");
+
+  RestorePyError(st);
+  ASSERT_TRUE(PyErr_Occurred());
+  PyObject* exc_type;
+  PyObject* exc_value;
+  PyObject* exc_traceback;
+  PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+  ASSERT_TRUE(PyErr_GivenExceptionMatches(exc_type, PyExc_ZeroDivisionError));
+  std::string py_message;
+  ASSERT_OK(internal::PyObject_StdStringStr(exc_value, &py_message));
+  ASSERT_EQ(py_message, "zzzt");
+}
+
+TEST(PyBuffer, InvalidInputObject) {
+  std::shared_ptr<Buffer> res;
+  PyObject* input = Py_None;
+  auto old_refcnt = Py_REFCNT(input);
+  {
+    Status st = PyBuffer::FromPyObject(input, &res);
+    ASSERT_TRUE(IsPyError(st)) << st.ToString();
+    ASSERT_FALSE(PyErr_Occurred());
+  }
+  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
+}
+
 class DecimalTest : public ::testing::Test {
  public:
   DecimalTest() : lock_(), decimal_constructor_() {
@@ -253,8 +301,6 @@ TEST(PandasConversionTest, TestObjectBlockWriteFails) {
 }
 
 TEST(BuiltinConversionTest, TestMixedTypeFails) {
-  PyAcquireGIL lock;
-
   OwnedRef list_ref(PyList_New(3));
   PyObject* list = list_ref.obj();
 
@@ -405,8 +451,6 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
 }
 
 TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) {
-  PyAcquireGIL lock;
-
   PyObject* value1 = this->CreatePythonDecimal("0.01").detach();
   ASSERT_NE(value1, nullptr);
 
diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc
index 8ff0e01480f..bc64fb7b55a 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -77,14 +77,6 @@ class SequenceBuilder {
   // Appending a none to the sequence
   Status AppendNone() { return builder_->AppendNull(); }
 
-  template <typename BuilderType>
-  Status Update(BuilderType* child_builder, int8_t tag) {
-    int32_t offset32 = -1;
-    RETURN_NOT_OK(internal::CastSize(child_builder->length(), &offset32));
-    DCHECK_GE(offset32, 0);
-    return builder_->Append(tag, offset32);
-  }
-
   template <typename BuilderType, typename MakeBuilderFn>
   Status CreateAndUpdate(std::shared_ptr<BuilderType>* child_builder, int8_t tag,
                          MakeBuilderFn make_builder) {
@@ -95,7 +87,7 @@ class SequenceBuilder {
       convert << static_cast<int>(tag);
       type_map_[tag] = builder_->AppendChild(*child_builder, convert.str());
     }
-    return Update(child_builder->get(), type_map_[tag]);
+    return builder_->Append(type_map_[tag]);
   }
 
   template <typename BuilderType, typename T>
@@ -332,8 +324,8 @@ Status SequenceBuilder::AppendDict(PyObject* context, PyObject* dict,
 
 Status CallCustomCallback(PyObject* context, PyObject* method_name, PyObject* elem,
                           PyObject** result) {
-  *result = NULL;
   if (context == Py_None) {
+    *result = NULL;
     return Status::SerializationError("error while calling callback on ",
                                       internal::PyObject_StdStringRepr(elem),
                                       ": handler not registered");
@@ -515,7 +507,7 @@ Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder* bui
           builder->AppendNdarray(static_cast<int32_t>(blobs_out->ndarrays.size())));
       std::shared_ptr<Tensor> tensor;
       RETURN_NOT_OK(NdarrayToTensor(default_memory_pool(),
-                                    reinterpret_cast<PyObject*>(array), &tensor));
+                                    reinterpret_cast<PyObject*>(array), {}, &tensor));
       blobs_out->ndarrays.push_back(tensor);
     } break;
     default: {
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 1f266df4a6a..f83a6cd2771 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -18,6 +18,7 @@
 #include "arrow/record_batch.h"
 
 #include <algorithm>
+#include <atomic>
 #include <cstdlib>
 #include <memory>
 #include <string>
@@ -27,6 +28,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/type.h"
+#include "arrow/util/atomic_shared_ptr.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/stl.h"
 
@@ -85,11 +87,12 @@ class SimpleRecordBatch : public RecordBatch {
   }
 
   std::shared_ptr<Array> column(int i) const override {
-    if (!boxed_columns_[i]) {
-      boxed_columns_[i] = MakeArray(columns_[i]);
+    std::shared_ptr<Array> result = internal::atomic_load(&boxed_columns_[i]);
+    if (!result) {
+      result = MakeArray(columns_[i]);
+      internal::atomic_store(&boxed_columns_[i], result);
     }
-    DCHECK(boxed_columns_[i]);
-    return boxed_columns_[i];
+    return result;
   }
 
   std::shared_ptr<ArrayData> column_data(int i) const override { return columns_[i]; }
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index 36e6ef79ab2..0bbb0450dbd 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -333,12 +333,4 @@ class Result {
 #define ARROW_ASSIGN_OR_RAISE(lhs, rexpr)                                              \
   ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
                              lhs, rexpr);
-
-namespace internal {
-
-// For Cython, because of https://github.com/cython/cython/issues/3018
-template <class T>
-using CResult = Result<T>;
-
-}  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 4f0589a2f5c..76aecd01fd3 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -91,20 +91,22 @@ struct NumericScalar : public internal::PrimitiveScalar {
       : internal::PrimitiveScalar{type, is_valid}, value(value) {}
 };
 
-struct ARROW_EXPORT BinaryScalar : public Scalar {
+template <typename Type>
+struct BaseBinaryScalar : public Scalar {
   std::shared_ptr<Buffer> value;
-  explicit BinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
-      : BinaryScalar(value, binary(), is_valid) {}
 
  protected:
-  BinaryScalar(const std::shared_ptr<Buffer>& value,
-               const std::shared_ptr<DataType>& type, bool is_valid = true)
+  BaseBinaryScalar(const std::shared_ptr<Buffer>& value,
+                   const std::shared_ptr<DataType>& type, bool is_valid = true)
       : Scalar{type, is_valid}, value(value) {}
 };
 
-struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
-  FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
-                        const std::shared_ptr<DataType>& type, bool is_valid = true);
+struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar<BinaryType> {
+  explicit BinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : BaseBinaryScalar(value, binary(), is_valid) {}
+
+ protected:
+  using BaseBinaryScalar::BaseBinaryScalar;
 };
 
 struct ARROW_EXPORT StringScalar : public BinaryScalar {
@@ -112,6 +114,24 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar {
       : BinaryScalar(value, utf8(), is_valid) {}
 };
 
+struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar<LargeBinaryType> {
+  explicit LargeBinaryScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : BaseBinaryScalar(value, large_binary(), is_valid) {}
+
+ protected:
+  using BaseBinaryScalar::BaseBinaryScalar;
+};
+
+struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
+  explicit LargeStringScalar(const std::shared_ptr<Buffer>& value, bool is_valid = true)
+      : LargeBinaryScalar(value, utf8(), is_valid) {}
+};
+
+struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
+  FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                        const std::shared_ptr<DataType>& type, bool is_valid = true);
+};
+
 class ARROW_EXPORT Date32Scalar : public NumericScalar<Date32Type> {
  public:
   using NumericScalar<Date32Type>::NumericScalar;
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index daff0194fe5..69ec4ca5c60 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -182,6 +182,25 @@ TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
   AssertCOOIndex(sidx, 11, {1, 2, 3});
 }
 
+TEST(TestSparseCOOTensor, TensorEquality) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values1 = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::vector<int64_t> values2 = {0, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer1 = Buffer::Wrap(values1);
+  std::shared_ptr<Buffer> buffer2 = Buffer::Wrap(values2);
+  NumericTensor<Int64Type> tensor1(buffer1, shape);
+  NumericTensor<Int64Type> tensor2(buffer1, shape);
+  NumericTensor<Int64Type> tensor3(buffer2, shape);
+  SparseTensorImpl<SparseCOOIndex> st1(tensor1);
+  SparseTensorImpl<SparseCOOIndex> st2(tensor2);
+  SparseTensorImpl<SparseCOOIndex> st3(tensor3);
+
+  ASSERT_TRUE(st1.Equals(st2));
+  ASSERT_TRUE(!st1.Equals(st3));
+}
+
 TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   std::vector<int64_t> shape = {6, 4};
   std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
@@ -269,4 +288,24 @@ TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
   ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
 }
 
+TEST(TestSparseCSRMatrix, TensorEquality) {
+  std::vector<int64_t> shape = {6, 4};
+  std::vector<int64_t> values1 = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::vector<int64_t> values2 = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  std::shared_ptr<Buffer> buffer1 = Buffer::Wrap(values1);
+  std::shared_ptr<Buffer> buffer2 = Buffer::Wrap(values2);
+  NumericTensor<Int64Type> tensor1(buffer1, shape);
+  NumericTensor<Int64Type> tensor2(buffer1, shape);
+  NumericTensor<Int64Type> tensor3(buffer2, shape);
+  SparseTensorImpl<SparseCSRIndex> st1(tensor1);
+  SparseTensorImpl<SparseCSRIndex> st2(tensor2);
+  SparseTensorImpl<SparseCSRIndex> st3(tensor3);
+
+  ASSERT_TRUE(st1.Equals(st2));
+  ASSERT_TRUE(!st1.Equals(st3));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/status-test.cc b/cpp/src/arrow/status-test.cc
index b7fc61f4801..b151e462b28 100644
--- a/cpp/src/arrow/status-test.cc
+++ b/cpp/src/arrow/status-test.cc
@@ -23,6 +23,16 @@
 
 namespace arrow {
 
+namespace {
+
+class TestStatusDetail : public StatusDetail {
+ public:
+  const char* type_id() const override { return "type_id"; }
+  std::string ToString() const override { return "a specific detail message"; }
+};
+
+}  // namespace
+
 TEST(StatusTest, TestCodeAndMessage) {
   Status ok = Status::OK();
   ASSERT_EQ(StatusCode::OK, ok.code());
@@ -40,6 +50,25 @@ TEST(StatusTest, TestToString) {
   ASSERT_EQ(file_error.ToString(), ss.str());
 }
 
+TEST(StatusTest, TestToStringWithDetail) {
+  Status status(StatusCode::IOError, "summary", std::make_shared<TestStatusDetail>());
+  ASSERT_EQ("IOError: summary. Detail: a specific detail message", status.ToString());
+
+  std::stringstream ss;
+  ss << status;
+  ASSERT_EQ(status.ToString(), ss.str());
+}
+
+TEST(StatusTest, TestWithDetail) {
+  Status status(StatusCode::IOError, "summary");
+  auto detail = std::make_shared<TestStatusDetail>();
+  Status new_status = status.WithDetail(detail);
+
+  ASSERT_EQ(new_status.code(), status.code());
+  ASSERT_EQ(new_status.message(), status.message());
+  ASSERT_EQ(new_status.detail(), detail);
+}
+
 TEST(StatusTest, AndStatus) {
   Status a = Status::OK();
   Status b = Status::OK();
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index cbb29119be6..785db459752 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -21,11 +21,17 @@
 
 namespace arrow {
 
-Status::Status(StatusCode code, const std::string& msg) {
+Status::Status(StatusCode code, const std::string& msg)
+    : Status::Status(code, msg, nullptr) {}
+
+Status::Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail) {
   ARROW_CHECK_NE(code, StatusCode::OK) << "Cannot construct ok status with message";
   state_ = new State;
   state_->code = code;
-  state_->msg = msg;
+  state_->msg = std::move(msg);
+  if (detail != nullptr) {
+    state_->detail = std::move(detail);
+  }
 }
 
 void Status::CopyFrom(const Status& s) {
@@ -77,21 +83,6 @@ std::string Status::CodeAsString() const {
     case StatusCode::SerializationError:
       type = "Serialization error";
       break;
-    case StatusCode::PythonError:
-      type = "Python error";
-      break;
-    case StatusCode::PlasmaObjectExists:
-      type = "Plasma object exists";
-      break;
-    case StatusCode::PlasmaObjectNonexistent:
-      type = "Plasma object is nonexistent";
-      break;
-    case StatusCode::PlasmaStoreFull:
-      type = "Plasma store is full";
-      break;
-    case StatusCode::PlasmaObjectAlreadySealed:
-      type = "Plasma object is already sealed";
-      break;
     case StatusCode::CodeGenError:
       type = "CodeGenError in Gandiva";
       break;
@@ -110,11 +101,16 @@ std::string Status::CodeAsString() const {
 
 std::string Status::ToString() const {
   std::string result(CodeAsString());
-  if (state_ == NULL) {
+  if (state_ == nullptr) {
     return result;
   }
   result += ": ";
   result += state_->msg;
+  if (state_->detail != nullptr) {
+    result += ". Detail: ";
+    result += state_->detail->ToString();
+  }
+
   return result;
 }
 
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 1ed0da65fc4..b69040949ad 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -17,6 +17,7 @@
 
 #include <cstring>
 #include <iosfwd>
+#include <memory>
 #include <string>
 #include <utility>
 
@@ -85,17 +86,13 @@ enum class StatusCode : char {
   UnknownError = 9,
   NotImplemented = 10,
   SerializationError = 11,
-  PythonError = 12,
   RError = 13,
-  PlasmaObjectExists = 20,
-  PlasmaObjectNonexistent = 21,
-  PlasmaStoreFull = 22,
-  PlasmaObjectAlreadySealed = 23,
-  StillExecuting = 24,
   // Gandiva range of errors
   CodeGenError = 40,
   ExpressionValidationError = 41,
-  ExecutionError = 42
+  ExecutionError = 42,
+  // Continue generic codes.
+  AlreadyExists = 45
 };
 
 #if defined(__clang__)
@@ -103,6 +100,17 @@ enum class StatusCode : char {
 class ARROW_MUST_USE_RESULT ARROW_EXPORT Status;
 #endif
 
+/// \brief An opaque class that allows subsystems to retain
+/// additional information inside the Status.
+class ARROW_EXPORT StatusDetail {
+ public:
+  virtual ~StatusDetail() = default;
+  // Return a unique id for the type of the StatusDetail
+  // (effectively a poor man's substitude for RTTI).
+  virtual const char* type_id() const = 0;
+  virtual std::string ToString() const = 0;
+};
+
 /// \brief Status outcome object (success or error)
 ///
 /// The Status object is an object holding the outcome of an operation.
@@ -124,6 +132,8 @@ class ARROW_EXPORT Status {
   }
 
   Status(StatusCode code, const std::string& msg);
+  /// \brief Pluggable constructor for use by sub-systems.  detail cannot be null.
+  Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> detail);
 
   // Copy the specified status.
   inline Status(const Status& s);
@@ -221,32 +231,6 @@ class ARROW_EXPORT Status {
     return Status(StatusCode::RError, util::StringBuilder(std::forward<Args>(args)...));
   }
 
-  template <typename... Args>
-  static Status PlasmaObjectExists(Args&&... args) {
-    return Status(StatusCode::PlasmaObjectExists,
-                  util::StringBuilder(std::forward<Args>(args)...));
-  }
-
-  template <typename... Args>
-  static Status PlasmaObjectNonexistent(Args&&... args) {
-    return Status(StatusCode::PlasmaObjectNonexistent,
-                  util::StringBuilder(std::forward<Args>(args)...));
-  }
-
-  template <typename... Args>
-  static Status PlasmaObjectAlreadySealed(Args&&... args) {
-    return Status(StatusCode::PlasmaObjectAlreadySealed,
-                  util::StringBuilder(std::forward<Args>(args)...));
-  }
-
-  template <typename... Args>
-  static Status PlasmaStoreFull(Args&&... args) {
-    return Status(StatusCode::PlasmaStoreFull,
-                  util::StringBuilder(std::forward<Args>(args)...));
-  }
-
-  static Status StillExecuting() { return Status(StatusCode::StillExecuting, ""); }
-
   template <typename... Args>
   static Status CodeGenError(Args&&... args) {
     return Status(StatusCode::CodeGenError,
@@ -290,22 +274,6 @@ class ARROW_EXPORT Status {
   bool IsSerializationError() const { return code() == StatusCode::SerializationError; }
   /// Return true iff the status indicates a R-originated error.
   bool IsRError() const { return code() == StatusCode::RError; }
-  /// Return true iff the status indicates a Python-originated error.
-  bool IsPythonError() const { return code() == StatusCode::PythonError; }
-  /// Return true iff the status indicates an already existing Plasma object.
-  bool IsPlasmaObjectExists() const { return code() == StatusCode::PlasmaObjectExists; }
-  /// Return true iff the status indicates a non-existent Plasma object.
-  bool IsPlasmaObjectNonexistent() const {
-    return code() == StatusCode::PlasmaObjectNonexistent;
-  }
-  /// Return true iff the status indicates an already sealed Plasma object.
-  bool IsPlasmaObjectAlreadySealed() const {
-    return code() == StatusCode::PlasmaObjectAlreadySealed;
-  }
-  /// Return true iff the status indicates the Plasma store reached its capacity limit.
-  bool IsPlasmaStoreFull() const { return code() == StatusCode::PlasmaStoreFull; }
-
-  bool IsStillExecuting() const { return code() == StatusCode::StillExecuting; }
 
   bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; }
 
@@ -330,6 +298,23 @@ class ARROW_EXPORT Status {
   /// \brief Return the specific error message attached to this status.
   std::string message() const { return ok() ? "" : state_->msg; }
 
+  /// \brief Return the status detail attached to this message.
+  std::shared_ptr<StatusDetail> detail() const {
+    return state_ == NULLPTR ? NULLPTR : state_->detail;
+  }
+
+  /// \brief Return a new Status copying the existing status, but
+  /// updating with the existing detail.
+  Status WithDetail(std::shared_ptr<StatusDetail> new_detail) const {
+    return Status(code(), message(), std::move(new_detail));
+  }
+
+  /// \brief Return a new Status with changed message, copying the
+  /// existing status code and detail.
+  Status WithMessage(std::string message) const {
+    return Status(code(), std::move(message), detail());
+  }
+
   [[noreturn]] void Abort() const;
   [[noreturn]] void Abort(const std::string& message) const;
 
@@ -341,6 +326,7 @@ class ARROW_EXPORT Status {
   struct State {
     StatusCode code;
     std::string msg;
+    std::shared_ptr<StatusDetail> detail;
   };
   // OK status has a `NULL` state_.  Otherwise, `state_` points to
   // a `State` structure containing the error code and message(s)
diff --git a/cpp/src/arrow/stl.h b/cpp/src/arrow/stl.h
index d641e39955b..37f3ce3fb3c 100644
--- a/cpp/src/arrow/stl.h
+++ b/cpp/src/arrow/stl.h
@@ -254,11 +254,11 @@ struct EnsureColumnTypes {
 
     if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
       compute::Datum casted;
-      ARROW_RETURN_NOT_OK(compute::Cast(ctx, compute::Datum(table.column(N - 1)->data()),
+      ARROW_RETURN_NOT_OK(compute::Cast(ctx, compute::Datum(table.column(N - 1)),
                                         expected_type, cast_options, &casted));
-      std::shared_ptr<Column> new_column = std::make_shared<Column>(
-          table.schema()->field(N - 1)->WithType(expected_type), casted.chunked_array());
-      ARROW_RETURN_NOT_OK(table.SetColumn(N - 1, new_column, table_owner));
+      auto new_field = table.schema()->field(N - 1)->WithType(expected_type);
+      ARROW_RETURN_NOT_OK(
+          table.SetColumn(N - 1, new_field, casted.chunked_array(), table_owner));
       *result = **table_owner;
     }
 
@@ -286,7 +286,7 @@ struct TupleSetter {
         typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::ArrayType;
 
     auto iter = rows->begin();
-    const ChunkedArray& chunked_array = *table.column(N - 1)->data();
+    const ChunkedArray& chunked_array = *table.column(N - 1);
     for (int i = 0; i < chunked_array.num_chunks(); i++) {
       const ArrayType& array =
           ::arrow::internal::checked_cast<const ArrayType&>(*chunked_array.chunk(i));
diff --git a/cpp/src/arrow/table-test.cc b/cpp/src/arrow/table-test.cc
index b0a870ee27d..a89cdc039f9 100644
--- a/cpp/src/arrow/table-test.cc
+++ b/cpp/src/arrow/table-test.cc
@@ -33,11 +33,6 @@
 
 namespace arrow {
 
-std::shared_ptr<Column> column(const std::shared_ptr<Field>& field,
-                               const std::vector<std::shared_ptr<Array>>& arrays) {
-  return std::make_shared<Column>(field, arrays);
-}
-
 class TestChunkedArray : public TestBase {
  protected:
   virtual void Construct() {
@@ -161,103 +156,6 @@ TEST_F(TestChunkedArray, Validate) {
   ASSERT_RAISES(Invalid, one_->Validate());
 }
 
-class TestColumn : public TestChunkedArray {
- protected:
-  void Construct() override {
-    TestChunkedArray::Construct();
-
-    one_col_ = std::make_shared<Column>(one_field_, one_);
-    another_col_ = std::make_shared<Column>(another_field_, another_);
-  }
-
-  std::shared_ptr<ChunkedArray> data_;
-  std::unique_ptr<Column> column_;
-
-  std::shared_ptr<Field> one_field_;
-  std::shared_ptr<Field> another_field_;
-
-  std::shared_ptr<Column> one_col_;
-  std::shared_ptr<Column> another_col_;
-};
-
-TEST_F(TestColumn, BasicAPI) {
-  ArrayVector arrays;
-  arrays.push_back(MakeRandomArray<Int32Array>(100));
-  arrays.push_back(MakeRandomArray<Int32Array>(100, 10));
-  arrays.push_back(MakeRandomArray<Int32Array>(100, 20));
-
-  auto f0 = field("c0", int32());
-  column_.reset(new Column(f0, arrays));
-
-  ASSERT_EQ("c0", column_->name());
-  ASSERT_TRUE(column_->type()->Equals(int32()));
-  ASSERT_EQ(300, column_->length());
-  ASSERT_EQ(30, column_->null_count());
-  ASSERT_EQ(3, column_->data()->num_chunks());
-}
-
-TEST_F(TestColumn, ChunksInhomogeneous) {
-  ArrayVector arrays;
-  arrays.push_back(MakeRandomArray<Int32Array>(100));
-  arrays.push_back(MakeRandomArray<Int32Array>(100, 10));
-
-  auto f0 = field("c0", int32());
-  column_.reset(new Column(f0, arrays));
-
-  ASSERT_OK(column_->ValidateData());
-
-  arrays.push_back(MakeRandomArray<Int16Array>(100, 10));
-  column_.reset(new Column(f0, arrays));
-  ASSERT_RAISES(Invalid, column_->ValidateData());
-}
-
-TEST_F(TestColumn, SliceEquals) {
-  arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
-  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
-  arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
-  one_field_ = field("column", int32());
-  Construct();
-
-  std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
-  ASSERT_EQ(slice->length(), 50);
-  ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));
-
-  std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
-  ASSERT_EQ(slice2->length(), 50);
-  ASSERT_TRUE(slice2->Equals(slice));
-}
-
-TEST_F(TestColumn, Equals) {
-  std::vector<bool> null_bitmap(100, true);
-  std::vector<int32_t> data(100, 1);
-  std::shared_ptr<Array> array;
-  ArrayFromVector<Int32Type, int32_t>(null_bitmap, data, &array);
-  arrays_one_.push_back(array);
-  arrays_another_.push_back(array);
-
-  one_field_ = field("column", int32());
-  another_field_ = field("column", int32());
-
-  Construct();
-  ASSERT_TRUE(one_col_->Equals(one_col_));
-  ASSERT_FALSE(one_col_->Equals(nullptr));
-  ASSERT_TRUE(one_col_->Equals(another_col_));
-  ASSERT_TRUE(one_col_->Equals(*another_col_.get()));
-
-  // Field is different
-  another_field_ = field("two", int32());
-  Construct();
-  ASSERT_FALSE(one_col_->Equals(another_col_));
-  ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
-
-  // ChunkedArray is different
-  another_field_ = field("column", int32());
-  arrays_another_.push_back(array);
-  Construct();
-  ASSERT_FALSE(one_col_->Equals(another_col_));
-  ASSERT_FALSE(one_col_->Equals(*another_col_.get()));
-}
-
 class TestTable : public TestBase {
  public:
   void MakeExample1(int length) {
@@ -271,9 +169,9 @@ class TestTable : public TestBase {
     arrays_ = {MakeRandomArray<Int32Array>(length), MakeRandomArray<UInt8Array>(length),
                MakeRandomArray<Int16Array>(length)};
 
-    columns_ = {std::make_shared<Column>(schema_->field(0), arrays_[0]),
-                std::make_shared<Column>(schema_->field(1), arrays_[1]),
-                std::make_shared<Column>(schema_->field(2), arrays_[2])};
+    columns_ = {std::make_shared<ChunkedArray>(arrays_[0]),
+                std::make_shared<ChunkedArray>(arrays_[1]),
+                std::make_shared<ChunkedArray>(arrays_[2])};
   }
 
  protected:
@@ -281,7 +179,7 @@ class TestTable : public TestBase {
   std::shared_ptr<Schema> schema_;
 
   std::vector<std::shared_ptr<Array>> arrays_;
-  std::vector<std::shared_ptr<Column>> columns_;
+  std::vector<std::shared_ptr<ChunkedArray>> columns_;
 };
 
 TEST_F(TestTable, EmptySchema) {
@@ -323,7 +221,6 @@ TEST_F(TestTable, Metadata) {
   ASSERT_TRUE(table_->schema()->Equals(*schema_));
 
   auto col = table_->column(0);
-  ASSERT_EQ(schema_->field(0)->name(), col->name());
   ASSERT_EQ(schema_->field(0)->type(), col->type());
 }
 
@@ -341,11 +238,9 @@ TEST_F(TestTable, InvalidColumns) {
   table_ = Table::Make(schema_, columns_, length);
   ASSERT_RAISES(Invalid, table_->Validate());
 
-  columns_ = {
-      std::make_shared<Column>(schema_->field(0), MakeRandomArray<Int32Array>(length)),
-      std::make_shared<Column>(schema_->field(1), MakeRandomArray<UInt8Array>(length)),
-      std::make_shared<Column>(schema_->field(2),
-                               MakeRandomArray<Int16Array>(length - 1))};
+  columns_ = {std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length)),
+              std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length)),
+              std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length - 1))};
 
   table_ = Table::Make(schema_, columns_, length);
   ASSERT_RAISES(Invalid, table_->Validate());
@@ -367,13 +262,10 @@ TEST_F(TestTable, Equals) {
   auto other = Table::Make(other_schema, columns_);
   ASSERT_FALSE(table_->Equals(*other));
   // Differing columns
-  std::vector<std::shared_ptr<Column>> other_columns = {
-      std::make_shared<Column>(schema_->field(0),
-                               MakeRandomArray<Int32Array>(length, 10)),
-      std::make_shared<Column>(schema_->field(1),
-                               MakeRandomArray<UInt8Array>(length, 10)),
-      std::make_shared<Column>(schema_->field(2),
-                               MakeRandomArray<Int16Array>(length, 10))};
+  std::vector<std::shared_ptr<ChunkedArray>> other_columns = {
+      std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length, 10)),
+      std::make_shared<ChunkedArray>(MakeRandomArray<UInt8Array>(length, 10)),
+      std::make_shared<ChunkedArray>(MakeRandomArray<Int16Array>(length, 10))};
 
   other = Table::Make(schema_, other_columns);
   ASSERT_FALSE(table_->Equals(*other));
@@ -391,10 +283,10 @@ TEST_F(TestTable, FromRecordBatches) {
   expected = Table::Make(schema_, columns_);
   ASSERT_TRUE(result->Equals(*expected));
 
-  std::vector<std::shared_ptr<Column>> other_columns;
+  std::vector<std::shared_ptr<ChunkedArray>> other_columns;
   for (int i = 0; i < schema_->num_fields(); ++i) {
     std::vector<std::shared_ptr<Array>> col_arrays = {arrays_[i], arrays_[i]};
-    other_columns.push_back(std::make_shared<Column>(schema_->field(i), col_arrays));
+    other_columns.push_back(std::make_shared<ChunkedArray>(col_arrays));
   }
 
   ASSERT_OK(Table::FromRecordBatches({batch1, batch1}, &result));
@@ -446,7 +338,7 @@ TEST_F(TestTable, CombineChunks) {
   std::shared_ptr<Table> table;
   ASSERT_OK(Table::FromRecordBatches({batch1, batch2}, &table));
   for (int i = 0; i < table->num_columns(); ++i) {
-    ASSERT_EQ(2, table->column(i)->data()->num_chunks());
+    ASSERT_EQ(2, table->column(i)->num_chunks());
   }
 
   std::shared_ptr<Table> compacted;
@@ -454,7 +346,7 @@ TEST_F(TestTable, CombineChunks) {
 
   EXPECT_TRUE(compacted->Equals(*table));
   for (int i = 0; i < compacted->num_columns(); ++i) {
-    EXPECT_EQ(1, compacted->column(i)->data()->num_chunks());
+    EXPECT_EQ(1, compacted->column(i)->num_chunks());
   }
 }
 
@@ -517,7 +409,8 @@ TEST_F(TestTable, RemoveColumn) {
   ASSERT_OK(table.RemoveColumn(0, &result));
 
   auto ex_schema = ::arrow::schema({schema_->field(1), schema_->field(2)});
-  std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(2)};
+  std::vector<std::shared_ptr<ChunkedArray>> ex_columns = {table.column(1),
+                                                           table.column(2)};
 
   auto expected = Table::Make(ex_schema, ex_columns);
   ASSERT_TRUE(result->Equals(*expected));
@@ -544,14 +437,13 @@ TEST_F(TestTable, SetColumn) {
   const Table& table = *table_sp;
 
   std::shared_ptr<Table> result;
-  ASSERT_OK(table.SetColumn(0, table.column(1), &result));
+  ASSERT_OK(table.SetColumn(0, schema_->field(1), table.column(1), &result));
 
   auto ex_schema =
       ::arrow::schema({schema_->field(1), schema_->field(1), schema_->field(2)});
-  std::vector<std::shared_ptr<Column>> ex_columns = {table.column(1), table.column(1),
-                                                     table.column(2)};
 
-  auto expected = Table::Make(ex_schema, ex_columns);
+  auto expected =
+      Table::Make(ex_schema, {table.column(1), table.column(1), table.column(2)});
   ASSERT_TRUE(result->Equals(*expected));
 }
 
@@ -576,7 +468,7 @@ TEST_F(TestTable, RemoveColumnEmpty) {
   auto schema = ::arrow::schema({f0});
   auto a0 = MakeRandomArray<Int32Array>(length);
 
-  auto table = Table::Make(schema, {std::make_shared<Column>(f0, a0)});
+  auto table = Table::Make(schema, {std::make_shared<ChunkedArray>(a0)});
 
   std::shared_ptr<Table> empty;
   ASSERT_OK(table->RemoveColumn(0, &empty));
@@ -584,7 +476,7 @@ TEST_F(TestTable, RemoveColumnEmpty) {
   ASSERT_EQ(table->num_rows(), empty->num_rows());
 
   std::shared_ptr<Table> added;
-  ASSERT_OK(empty->AddColumn(0, table->column(0), &added));
+  ASSERT_OK(empty->AddColumn(0, f0, table->column(0), &added));
   ASSERT_EQ(table->num_rows(), added->num_rows());
 }
 
@@ -595,23 +487,25 @@ TEST_F(TestTable, AddColumn) {
   auto table_sp = Table::Make(schema_, columns_);
   const Table& table = *table_sp;
 
+  auto f0 = schema_->field(0);
+
   std::shared_ptr<Table> result;
   // Some negative tests with invalid index
-  Status status = table.AddColumn(10, columns_[0], &result);
+  Status status = table.AddColumn(10, f0, columns_[0], &result);
   ASSERT_TRUE(status.IsInvalid());
-  status = table.AddColumn(4, columns_[0], &result);
+  status = table.AddColumn(4, f0, columns_[0], &result);
   ASSERT_TRUE(status.IsInvalid());
-  status = table.AddColumn(-1, columns_[0], &result);
+  status = table.AddColumn(-1, f0, columns_[0], &result);
   ASSERT_TRUE(status.IsInvalid());
 
   // Add column with wrong length
-  auto longer_col = std::make_shared<Column>(schema_->field(0),
-                                             MakeRandomArray<Int32Array>(length + 1));
-  status = table.AddColumn(0, longer_col, &result);
+  auto longer_col =
+      std::make_shared<ChunkedArray>(MakeRandomArray<Int32Array>(length + 1));
+  status = table.AddColumn(0, f0, longer_col, &result);
   ASSERT_TRUE(status.IsInvalid());
 
   // Add column 0 in different places
-  ASSERT_OK(table.AddColumn(0, columns_[0], &result));
+  ASSERT_OK(table.AddColumn(0, f0, columns_[0], &result));
   auto ex_schema = ::arrow::schema(
       {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
 
@@ -619,7 +513,7 @@ TEST_F(TestTable, AddColumn) {
       ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
   ASSERT_TRUE(result->Equals(*expected));
 
-  ASSERT_OK(table.AddColumn(1, columns_[0], &result));
+  ASSERT_OK(table.AddColumn(1, f0, columns_[0], &result));
   ex_schema = ::arrow::schema(
       {schema_->field(0), schema_->field(0), schema_->field(1), schema_->field(2)});
 
@@ -627,14 +521,14 @@ TEST_F(TestTable, AddColumn) {
       ex_schema, {table.column(0), table.column(0), table.column(1), table.column(2)});
   ASSERT_TRUE(result->Equals(*expected));
 
-  ASSERT_OK(table.AddColumn(2, columns_[0], &result));
+  ASSERT_OK(table.AddColumn(2, f0, columns_[0], &result));
   ex_schema = ::arrow::schema(
       {schema_->field(0), schema_->field(1), schema_->field(0), schema_->field(2)});
   expected = Table::Make(
       ex_schema, {table.column(0), table.column(1), table.column(0), table.column(2)});
   ASSERT_TRUE(result->Equals(*expected));
 
-  ASSERT_OK(table.AddColumn(3, columns_[0], &result));
+  ASSERT_OK(table.AddColumn(3, f0, columns_[0], &result));
   ex_schema = ::arrow::schema(
       {schema_->field(0), schema_->field(1), schema_->field(2), schema_->field(0)});
   expected = Table::Make(
@@ -844,11 +738,14 @@ TEST_F(TestTableBatchReader, ReadNext) {
 
   auto sch1 = arrow::schema({field("f1", int32()), field("f2", int32())});
 
-  std::vector<std::shared_ptr<Column>> columns;
+  std::vector<std::shared_ptr<ChunkedArray>> columns;
 
   std::shared_ptr<RecordBatch> batch;
 
-  columns = {column(sch1->field(0), {a1, a4, a2}), column(sch1->field(1), {a2, a2})};
+  std::vector<std::shared_ptr<Array>> arrays_1 = {a1, a4, a2};
+  std::vector<std::shared_ptr<Array>> arrays_2 = {a2, a2};
+  columns = {std::make_shared<ChunkedArray>(arrays_1),
+             std::make_shared<ChunkedArray>(arrays_2)};
   auto t1 = Table::Make(sch1, columns);
 
   TableBatchReader i1(*t1);
@@ -865,7 +762,10 @@ TEST_F(TestTableBatchReader, ReadNext) {
   ASSERT_OK(i1.ReadNext(&batch));
   ASSERT_EQ(nullptr, batch);
 
-  columns = {column(sch1->field(0), {a1}), column(sch1->field(1), {a4})};
+  arrays_1 = {a1};
+  arrays_2 = {a4};
+  columns = {std::make_shared<ChunkedArray>(arrays_1),
+             std::make_shared<ChunkedArray>(arrays_2)};
   auto t2 = Table::Make(sch1, columns);
 
   TableBatchReader i2(*t2);
@@ -887,7 +787,9 @@ TEST_F(TestTableBatchReader, Chunksize) {
   auto a3 = MakeRandomArray<Int32Array>(10);
 
   auto sch1 = arrow::schema({field("f1", int32())});
-  auto t1 = Table::Make(sch1, {column(sch1->field(0), {a1, a2, a3})});
+
+  std::vector<std::shared_ptr<Array>> arrays = {a1, a2, a3};
+  auto t1 = Table::Make(sch1, {std::make_shared<ChunkedArray>(arrays)});
 
   TableBatchReader i1(*t1);
 
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 5c58adcd740..907cc8c2241 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -36,27 +36,8 @@ namespace arrow {
 
 using internal::checked_cast;
 
-namespace {
-
-// If a column contains multiple chunks, concatenates those chunks into one and
-// makes a new column out of it. Otherwise makes `compacted` point to the same
-// column.
-Status CompactColumn(const std::shared_ptr<Column>& column, MemoryPool* pool,
-                     std::shared_ptr<Column>* compacted) {
-  if (column->data()->num_chunks() <= 1) {
-    *compacted = column;
-    return Status::OK();
-  }
-  std::shared_ptr<Array> merged_data_array;
-  RETURN_NOT_OK(Concatenate(column->data()->chunks(), pool, &merged_data_array));
-  *compacted = std::make_shared<Column>(column->field(), merged_data_array);
-  return Status::OK();
-}
-
-}  // namespace
-
 // ----------------------------------------------------------------------
-// ChunkedArray and Column methods
+// ChunkedArray methods
 
 ChunkedArray::ChunkedArray(const ArrayVector& chunks) : chunks_(chunks) {
   length_ = 0;
@@ -211,75 +192,6 @@ Status ChunkedArray::Validate() const {
   return Status::OK();
 }
 
-// ----------------------------------------------------------------------
-
-Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
-    : field_(field) {
-  data_ = std::make_shared<ChunkedArray>(chunks, field->type());
-}
-
-Column::Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data)
-    : field_(field) {
-  if (!data) {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({}), field->type());
-  } else {
-    data_ = std::make_shared<ChunkedArray>(ArrayVector({data}), field->type());
-  }
-}
-
-Column::Column(const std::string& name, const std::shared_ptr<Array>& data)
-    : Column(::arrow::field(name, data->type()), data) {}
-
-Column::Column(const std::string& name, const std::shared_ptr<ChunkedArray>& data)
-    : Column(::arrow::field(name, data->type()), data) {}
-
-Column::Column(const std::shared_ptr<Field>& field,
-               const std::shared_ptr<ChunkedArray>& data)
-    : field_(field), data_(data) {}
-
-Status Column::Flatten(MemoryPool* pool,
-                       std::vector<std::shared_ptr<Column>>* out) const {
-  std::vector<std::shared_ptr<Column>> flattened;
-  std::vector<std::shared_ptr<Field>> flattened_fields = field_->Flatten();
-  std::vector<std::shared_ptr<ChunkedArray>> flattened_data;
-  RETURN_NOT_OK(data_->Flatten(pool, &flattened_data));
-  DCHECK_EQ(flattened_fields.size(), flattened_data.size());
-  for (size_t i = 0; i < flattened_fields.size(); ++i) {
-    flattened.push_back(std::make_shared<Column>(flattened_fields[i], flattened_data[i]));
-  }
-  *out = flattened;
-  return Status::OK();
-}
-
-bool Column::Equals(const Column& other) const {
-  if (!field_->Equals(other.field())) {
-    return false;
-  }
-  return data_->Equals(other.data());
-}
-
-bool Column::Equals(const std::shared_ptr<Column>& other) const {
-  if (this == other.get()) {
-    return true;
-  }
-  if (!other) {
-    return false;
-  }
-
-  return Equals(*other.get());
-}
-
-Status Column::ValidateData() {
-  for (int i = 0; i < data_->num_chunks(); ++i) {
-    std::shared_ptr<DataType> type = data_->chunk(i)->type();
-    if (!this->type()->Equals(type)) {
-      return Status::Invalid("In chunk ", i, " expected type ", this->type()->ToString(),
-                             " but saw ", type->ToString());
-    }
-  }
-  return Status::OK();
-}
-
 // ----------------------------------------------------------------------
 // Table methods
 
@@ -288,7 +200,8 @@ Status Column::ValidateData() {
 class SimpleTable : public Table {
  public:
   SimpleTable(const std::shared_ptr<Schema>& schema,
-              const std::vector<std::shared_ptr<Column>>& columns, int64_t num_rows = -1)
+              const std::vector<std::shared_ptr<ChunkedArray>>& columns,
+              int64_t num_rows = -1)
       : columns_(columns) {
     schema_ = schema;
     if (num_rows < 0) {
@@ -317,12 +230,11 @@ class SimpleTable : public Table {
 
     columns_.resize(columns.size());
     for (size_t i = 0; i < columns.size(); ++i) {
-      columns_[i] =
-          std::make_shared<Column>(schema->field(static_cast<int>(i)), columns[i]);
+      columns_[i] = std::make_shared<ChunkedArray>(columns[i]);
     }
   }
 
-  std::shared_ptr<Column> column(int i) const override { return columns_[i]; }
+  std::shared_ptr<ChunkedArray> column(int i) const override { return columns_[i]; }
 
   std::shared_ptr<Table> Slice(int64_t offset, int64_t length) const override {
     auto sliced = columns_;
@@ -343,7 +255,8 @@ class SimpleTable : public Table {
     return Status::OK();
   }
 
-  Status AddColumn(int i, const std::shared_ptr<Column>& col,
+  Status AddColumn(int i, std::shared_ptr<Field> field_arg,
+                   std::shared_ptr<ChunkedArray> col,
                    std::shared_ptr<Table>* out) const override {
     DCHECK(col != nullptr);
 
@@ -353,14 +266,20 @@ class SimpleTable : public Table {
           " but got length ", col->length());
     }
 
+    if (!field_arg->type()->Equals(col->type())) {
+      return Status::Invalid("Field type did not match data type");
+    }
+
     std::shared_ptr<Schema> new_schema;
-    RETURN_NOT_OK(schema_->AddField(i, col->field(), &new_schema));
+    RETURN_NOT_OK(schema_->AddField(i, field_arg, &new_schema));
 
-    *out = Table::Make(new_schema, internal::AddVectorElement(columns_, i, col));
+    *out =
+        Table::Make(new_schema, internal::AddVectorElement(columns_, i, std::move(col)));
     return Status::OK();
   }
 
-  Status SetColumn(int i, const std::shared_ptr<Column>& col,
+  Status SetColumn(int i, std::shared_ptr<Field> field_arg,
+                   std::shared_ptr<ChunkedArray> col,
                    std::shared_ptr<Table>* out) const override {
     DCHECK(col != nullptr);
 
@@ -370,10 +289,14 @@ class SimpleTable : public Table {
           " but got length ", col->length());
     }
 
-    std::shared_ptr<Schema> new_schema;
-    RETURN_NOT_OK(schema_->SetField(i, col->field(), &new_schema));
+    if (!field_arg->type()->Equals(col->type())) {
+      return Status::Invalid("Field type did not match data type");
+    }
 
-    *out = Table::Make(new_schema, internal::ReplaceVectorElement(columns_, i, col));
+    std::shared_ptr<Schema> new_schema;
+    RETURN_NOT_OK(schema_->SetField(i, field_arg, &new_schema));
+    *out = Table::Make(new_schema,
+                       internal::ReplaceVectorElement(columns_, i, std::move(col)));
     return Status::OK();
   }
 
@@ -385,13 +308,15 @@ class SimpleTable : public Table {
 
   Status Flatten(MemoryPool* pool, std::shared_ptr<Table>* out) const override {
     std::vector<std::shared_ptr<Field>> flattened_fields;
-    std::vector<std::shared_ptr<Column>> flattened_columns;
-    for (const auto& column : columns_) {
-      std::vector<std::shared_ptr<Column>> new_columns;
-      RETURN_NOT_OK(column->Flatten(pool, &new_columns));
-      for (const auto& new_col : new_columns) {
-        flattened_fields.push_back(new_col->field());
-        flattened_columns.push_back(new_col);
+    std::vector<std::shared_ptr<ChunkedArray>> flattened_columns;
+    for (int i = 0; i < num_columns(); ++i) {
+      std::vector<std::shared_ptr<ChunkedArray>> new_columns;
+      std::vector<std::shared_ptr<Field>> new_fields = field(i)->Flatten();
+      RETURN_NOT_OK(column(i)->Flatten(pool, &new_columns));
+      DCHECK_EQ(new_columns.size(), new_fields.size());
+      for (size_t j = 0; j < new_columns.size(); ++j) {
+        flattened_fields.push_back(new_fields[j]);
+        flattened_columns.push_back(new_columns[j]);
       }
     }
     auto flattened_schema =
@@ -406,48 +331,41 @@ class SimpleTable : public Table {
       return Status::Invalid("Number of columns did not match schema");
     }
     for (int i = 0; i < num_columns(); ++i) {
-      const Column* col = columns_[i].get();
+      const ChunkedArray* col = columns_[i].get();
       if (col == nullptr) {
         return Status::Invalid("Column ", i, " was null");
       }
-      if (!col->field()->Equals(*schema_->field(i))) {
-        return Status::Invalid("Column field ", i, " named ", col->name(),
-                               " is inconsistent with schema");
+      if (!col->type()->Equals(*schema_->field(i)->type())) {
+        return Status::Invalid("Column data for field ", i, " with type ",
+                               col->type()->ToString(), " is inconsistent with schema ",
+                               schema_->field(i)->type()->ToString());
       }
     }
 
     // Make sure columns are all the same length
     for (int i = 0; i < num_columns(); ++i) {
-      const Column* col = columns_[i].get();
+      const ChunkedArray* col = columns_[i].get();
       if (col->length() != num_rows_) {
-        return Status::Invalid("Column ", i, " named ", col->name(), " expected length ",
-                               num_rows_, " but got length ", col->length());
+        return Status::Invalid("Column ", i, " named ", field(i)->name(),
+                               " expected length ", num_rows_, " but got length ",
+                               col->length());
       }
     }
     return Status::OK();
   }
 
  private:
-  std::vector<std::shared_ptr<Column>> columns_;
+  std::vector<std::shared_ptr<ChunkedArray>> columns_;
 };
 
 Table::Table() : num_rows_(0) {}
 
-std::shared_ptr<Table> Table::Make(const std::shared_ptr<Schema>& schema,
-                                   const std::vector<std::shared_ptr<Column>>& columns,
-                                   int64_t num_rows) {
+std::shared_ptr<Table> Table::Make(
+    const std::shared_ptr<Schema>& schema,
+    const std::vector<std::shared_ptr<ChunkedArray>>& columns, int64_t num_rows) {
   return std::make_shared<SimpleTable>(schema, columns, num_rows);
 }
 
-std::shared_ptr<Table> Table::Make(const std::vector<std::shared_ptr<Column>>& columns,
-                                   int64_t num_rows) {
-  std::vector<std::shared_ptr<Field>> fields(columns.size());
-  std::transform(columns.begin(), columns.end(), fields.begin(),
-                 [](const std::shared_ptr<Column>& column) { return column->field(); });
-  return std::make_shared<SimpleTable>(::arrow::schema(std::move(fields)), columns,
-                                       num_rows);
-}
-
 std::shared_ptr<Table> Table::Make(const std::shared_ptr<Schema>& schema,
                                    const std::vector<std::shared_ptr<Array>>& arrays,
                                    int64_t num_rows) {
@@ -468,14 +386,14 @@ Status Table::FromRecordBatches(const std::shared_ptr<Schema>& schema,
     }
   }
 
-  std::vector<std::shared_ptr<Column>> columns(ncolumns);
+  std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
   std::vector<std::shared_ptr<Array>> column_arrays(nbatches);
 
   for (int i = 0; i < ncolumns; ++i) {
     for (int j = 0; j < nbatches; ++j) {
       column_arrays[j] = batches[j]->column(i);
     }
-    columns[i] = std::make_shared<Column>(schema->field(i), column_arrays);
+    columns[i] = std::make_shared<ChunkedArray>(column_arrays, schema->field(i)->type());
   }
 
   *table = Table::Make(schema, columns);
@@ -501,14 +419,14 @@ Status Table::FromChunkedStructArray(const std::shared_ptr<ChunkedArray>& array,
   int num_chunks = array->num_chunks();
 
   const auto& struct_chunks = array->chunks();
-  std::vector<std::shared_ptr<Column>> columns(num_columns);
+  std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns);
   for (int i = 0; i < num_columns; ++i) {
     ArrayVector chunks(num_chunks);
     std::transform(struct_chunks.begin(), struct_chunks.end(), chunks.begin(),
                    [i](const std::shared_ptr<Array>& struct_chunk) {
                      return static_cast<const StructArray&>(*struct_chunk).field(i);
                    });
-    columns[i] = std::make_shared<Column>(type->child(i), chunks);
+    columns[i] = std::make_shared<ChunkedArray>(chunks);
   }
 
   *table = Table::Make(::arrow::schema(type->children()), columns, array->length());
@@ -518,7 +436,7 @@ Status Table::FromChunkedStructArray(const std::shared_ptr<ChunkedArray>& array,
 std::vector<std::string> Table::ColumnNames() const {
   std::vector<std::string> names(num_columns());
   for (int i = 0; i < num_columns(); ++i) {
-    names[i] = column(i)->name();
+    names[i] = field(i)->name();
   }
   return names;
 }
@@ -529,11 +447,11 @@ Status Table::RenameColumns(const std::vector<std::string>& names,
     return Status::Invalid("tried to rename a table of ", num_columns(),
                            " columns but only ", names.size(), " names were provided");
   }
-  std::vector<std::shared_ptr<Column>> columns(num_columns());
+  std::vector<std::shared_ptr<ChunkedArray>> columns(num_columns());
   std::vector<std::shared_ptr<Field>> fields(num_columns());
   for (int i = 0; i < num_columns(); ++i) {
-    fields[i] = column(i)->field()->WithName(names[i]);
-    columns[i] = std::make_shared<Column>(fields[i], column(i)->data());
+    columns[i] = column(i);
+    fields[i] = field(i)->WithName(names[i]);
   }
   *out = Table::Make(::arrow::schema(std::move(fields)), std::move(columns), num_rows());
   return Status::OK();
@@ -558,17 +476,16 @@ Status ConcatenateTables(const std::vector<std::shared_ptr<Table>>& tables,
     }
   }
 
-  std::vector<std::shared_ptr<Column>> columns(ncolumns);
+  std::vector<std::shared_ptr<ChunkedArray>> columns(ncolumns);
   for (int i = 0; i < ncolumns; ++i) {
     std::vector<std::shared_ptr<Array>> column_arrays;
     for (int j = 0; j < ntables; ++j) {
-      const std::vector<std::shared_ptr<Array>>& chunks =
-          tables[j]->column(i)->data()->chunks();
+      const std::vector<std::shared_ptr<Array>>& chunks = tables[j]->column(i)->chunks();
       for (const auto& chunk : chunks) {
         column_arrays.push_back(chunk);
       }
     }
-    columns[i] = std::make_shared<Column>(schema->field(i), column_arrays);
+    columns[i] = std::make_shared<ChunkedArray>(column_arrays);
   }
   *table = Table::Make(schema, columns);
   return Status::OK();
@@ -595,9 +512,16 @@ bool Table::Equals(const Table& other) const {
 
 Status Table::CombineChunks(MemoryPool* pool, std::shared_ptr<Table>* out) const {
   const int ncolumns = num_columns();
-  std::vector<std::shared_ptr<Column>> compacted_columns(ncolumns);
+  std::vector<std::shared_ptr<ChunkedArray>> compacted_columns(ncolumns);
   for (int i = 0; i < ncolumns; ++i) {
-    RETURN_NOT_OK(CompactColumn(column(i), pool, &compacted_columns[i]));
+    auto col = column(i);
+    if (col->num_chunks() <= 1) {
+      compacted_columns[i] = col;
+    } else {
+      std::shared_ptr<Array> compacted;
+      RETURN_NOT_OK(Concatenate(col->chunks(), pool, &compacted));
+      compacted_columns[i] = std::make_shared<ChunkedArray>(compacted);
+    }
   }
   *out = Table::Make(schema(), compacted_columns);
   return Status::OK();
@@ -616,7 +540,7 @@ class TableBatchReader::TableBatchReaderImpl {
         absolute_row_position_(0),
         max_chunksize_(std::numeric_limits<int64_t>::max()) {
     for (int i = 0; i < table.num_columns(); ++i) {
-      column_data_[i] = table.column(i)->data().get();
+      column_data_[i] = table.column(i).get();
     }
   }
 
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 2e7dcee904c..6a3bdc5ca31 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -111,95 +111,6 @@ class ARROW_EXPORT ChunkedArray {
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
 };
 
-/// \class Column
-/// \brief An immutable column data structure consisting of a field (type
-/// metadata) and a chunked data array
-class ARROW_EXPORT Column {
- public:
-  /// \brief Construct a column from a vector of arrays
-  ///
-  /// The array chunks' datatype must match the field's datatype.
-  Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
-  /// \brief Construct a column from a chunked array
-  ///
-  /// The chunked array's datatype must match the field's datatype.
-  Column(const std::shared_ptr<Field>& field, const std::shared_ptr<ChunkedArray>& data);
-  /// \brief Construct a column from a single array
-  ///
-  /// The array's datatype must match the field's datatype.
-  Column(const std::shared_ptr<Field>& field, const std::shared_ptr<Array>& data);
-
-  /// \brief Construct a column from a name and an array
-  ///
-  /// A field with the given name and the array's datatype is automatically created.
-  Column(const std::string& name, const std::shared_ptr<Array>& data);
-  /// \brief Construct a column from a name and a chunked array
-  ///
-  /// A field with the given name and the array's datatype is automatically created.
-  Column(const std::string& name, const std::shared_ptr<ChunkedArray>& data);
-
-  int64_t length() const { return data_->length(); }
-
-  int64_t null_count() const { return data_->null_count(); }
-
-  std::shared_ptr<Field> field() const { return field_; }
-
-  /// \brief The column name
-  /// \return the column's name in the passed metadata
-  const std::string& name() const { return field_->name(); }
-
-  /// \brief The column type
-  /// \return the column's type according to the metadata
-  std::shared_ptr<DataType> type() const { return field_->type(); }
-
-  /// \brief The column data as a chunked array
-  /// \return the column's data as a chunked logical array
-  std::shared_ptr<ChunkedArray> data() const { return data_; }
-
-  /// \brief Construct a zero-copy slice of the column with the indicated
-  /// offset and length
-  ///
-  /// \param[in] offset the position of the first element in the constructed
-  /// slice
-  /// \param[in] length the length of the slice. If there are not enough
-  /// elements in the column, the length will be adjusted accordingly
-  ///
-  /// \return a new object wrapped in std::shared_ptr<Column>
-  std::shared_ptr<Column> Slice(int64_t offset, int64_t length) const {
-    return std::make_shared<Column>(field_, data_->Slice(offset, length));
-  }
-
-  /// \brief Slice from offset until end of the column
-  std::shared_ptr<Column> Slice(int64_t offset) const {
-    return std::make_shared<Column>(field_, data_->Slice(offset));
-  }
-
-  /// \brief Flatten this column as a vector of columns
-  ///
-  /// \param[in] pool The pool for buffer allocations, if any
-  /// \param[out] out The resulting vector of arrays
-  Status Flatten(MemoryPool* pool, std::vector<std::shared_ptr<Column>>* out) const;
-
-  /// \brief Determine if two columns are equal.
-  ///
-  /// Two columns can be equal only if they have equal datatypes.
-  /// However, they may be equal even if they have different chunkings.
-  bool Equals(const Column& other) const;
-  /// \brief Determine if the two columns are equal.
-  bool Equals(const std::shared_ptr<Column>& other) const;
-
-  /// \brief Verify that the column's array data is consistent with the passed
-  /// field's metadata
-  Status ValidateData();
-
- protected:
-  std::shared_ptr<Field> field_;
-  std::shared_ptr<ChunkedArray> data_;
-
- private:
-  ARROW_DISALLOW_COPY_AND_ASSIGN(Column);
-};
-
 /// \class Table
 /// \brief Logical table as sequence of chunked arrays
 class ARROW_EXPORT Table {
@@ -209,18 +120,11 @@ class ARROW_EXPORT Table {
   /// \brief Construct a Table from schema and columns
   /// If columns is zero-length, the table's number of rows is zero
   /// \param schema The table schema (column types)
-  /// \param columns The table's columns
+  /// \param columns The table's columns as chunked arrays
   /// \param num_rows number of rows in table, -1 (default) to infer from columns
-  static std::shared_ptr<Table> Make(const std::shared_ptr<Schema>& schema,
-                                     const std::vector<std::shared_ptr<Column>>& columns,
-                                     int64_t num_rows = -1);
-
-  /// \brief Construct a Table from columns, schema is assembled from column fields
-  /// If columns is zero-length, the table's number of rows is zero
-  /// \param columns The table's columns
-  /// \param num_rows number of rows in table, -1 (default) to infer from columns
-  static std::shared_ptr<Table> Make(const std::vector<std::shared_ptr<Column>>& columns,
-                                     int64_t num_rows = -1);
+  static std::shared_ptr<Table> Make(
+      const std::shared_ptr<Schema>& schema,
+      const std::vector<std::shared_ptr<ChunkedArray>>& columns, int64_t num_rows = -1);
 
   /// \brief Construct a Table from schema and arrays
   /// \param schema The table schema (column types)
@@ -265,7 +169,10 @@ class ARROW_EXPORT Table {
   std::shared_ptr<Schema> schema() const { return schema_; }
 
   /// Return a column by index
-  virtual std::shared_ptr<Column> column(int i) const = 0;
+  virtual std::shared_ptr<ChunkedArray> column(int i) const = 0;
+
+  /// Return a column's field by index
+  std::shared_ptr<Field> field(int i) const { return schema_->field(i); }
 
   /// \brief Construct a zero-copy slice of the table with the
   /// indicated offset and length
@@ -284,7 +191,7 @@ class ARROW_EXPORT Table {
   /// \brief Return a column by name
   /// \param[in] name field name
   /// \return an Array or null if no field was found
-  std::shared_ptr<Column> GetColumnByName(const std::string& name) const {
+  std::shared_ptr<ChunkedArray> GetColumnByName(const std::string& name) const {
     auto i = schema_->GetFieldIndex(name);
     return i == -1 ? NULLPTR : column(i);
   }
@@ -293,11 +200,13 @@ class ARROW_EXPORT Table {
   virtual Status RemoveColumn(int i, std::shared_ptr<Table>* out) const = 0;
 
   /// \brief Add column to the table, producing a new Table
-  virtual Status AddColumn(int i, const std::shared_ptr<Column>& column,
+  virtual Status AddColumn(int i, std::shared_ptr<Field> field_arg,
+                           std::shared_ptr<ChunkedArray> column,
                            std::shared_ptr<Table>* out) const = 0;
 
   /// \brief Replace a column in the table, producing a new Table
-  virtual Status SetColumn(int i, const std::shared_ptr<Column>& column,
+  virtual Status SetColumn(int i, std::shared_ptr<Field> field_arg,
+                           std::shared_ptr<ChunkedArray> column,
                            std::shared_ptr<Table>* out) const = 0;
 
   /// \brief Return names of all columns
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
index 36e97434d28..4638cd7739b 100644
--- a/cpp/src/arrow/tensor-test.cc
+++ b/cpp/src/arrow/tensor-test.cc
@@ -155,6 +155,56 @@ TEST(TestTensor, CountNonZeroForNonContiguousTensor) {
   AssertCountNonZero(t, 8);
 }
 
+TEST(TestTensor, Equals) {
+  std::vector<int64_t> shape = {4, 4};
+
+  std::vector<int64_t> c_values = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+  std::vector<int64_t> c_strides = {32, 8};
+  Tensor tc1(int64(), Buffer::Wrap(c_values), shape, c_strides);
+  Tensor tc2(int64(), Buffer::Wrap(c_values), shape, c_strides);
+
+  std::vector<int64_t> f_values = {1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 4, 8, 12, 16};
+  Tensor tc3(int64(), Buffer::Wrap(f_values), shape, c_strides);
+
+  std::vector<int64_t> f_strides = {8, 32};
+  Tensor tf1(int64(), Buffer::Wrap(f_values), shape, f_strides);
+  Tensor tf2(int64(), Buffer::Wrap(c_values), shape, f_strides);
+
+  std::vector<int64_t> nc_values = {1, 0, 5, 0, 9,  0, 13, 0, 2, 0, 6, 0, 10, 0, 14, 0,
+                                    3, 0, 7, 0, 11, 0, 15, 0, 4, 0, 8, 0, 12, 0, 16, 0};
+  std::vector<int64_t> nc_strides = {16, 64};
+  Tensor tnc(int64(), Buffer::Wrap(nc_values), shape, nc_strides);
+
+  ASSERT_TRUE(tc1.is_contiguous());
+  ASSERT_TRUE(tc1.is_row_major());
+
+  ASSERT_TRUE(tf1.is_contiguous());
+  ASSERT_TRUE(tf1.is_column_major());
+
+  ASSERT_FALSE(tnc.is_contiguous());
+
+  // same object
+  EXPECT_TRUE(tc1.Equals(tc1));
+  EXPECT_TRUE(tf1.Equals(tf1));
+  EXPECT_TRUE(tnc.Equals(tnc));
+
+  // different objects
+  EXPECT_TRUE(tc1.Equals(tc2));
+  EXPECT_FALSE(tc1.Equals(tc3));
+
+  // row-major and column-major
+  EXPECT_TRUE(tc1.Equals(tf1));
+  EXPECT_FALSE(tc3.Equals(tf1));
+
+  // row-major and non-contiguous
+  EXPECT_TRUE(tc1.Equals(tnc));
+  EXPECT_FALSE(tc3.Equals(tnc));
+
+  // column-major and non-contiguous
+  EXPECT_TRUE(tf1.Equals(tnc));
+  EXPECT_FALSE(tf2.Equals(tnc));
+}
+
 TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index ee66b2e30d8..70870a74c45 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -45,8 +45,7 @@
 
 namespace arrow {
 
-static void PrintColumn(const Column& col, std::stringstream* ss) {
-  const ChunkedArray& carr = *col.data();
+static void PrintChunkedArray(const ChunkedArray& carr, std::stringstream* ss) {
   for (int i = 0; i < carr.num_chunks(); ++i) {
     auto c1 = carr.chunk(i);
     *ss << "Chunk " << i << std::endl;
@@ -147,17 +146,17 @@ void AssertTablesEqual(const Table& expected, const Table& actual,
 
   if (same_chunk_layout) {
     for (int i = 0; i < actual.num_columns(); ++i) {
-      AssertChunkedEqual(*expected.column(i)->data(), *actual.column(i)->data());
+      AssertChunkedEqual(*expected.column(i), *actual.column(i));
     }
   } else {
     std::stringstream ss;
     if (!actual.Equals(expected)) {
       for (int i = 0; i < expected.num_columns(); ++i) {
         ss << "Actual column " << i << std::endl;
-        PrintColumn(*actual.column(i), &ss);
+        PrintChunkedArray(*actual.column(i), &ss);
 
         ss << "Expected column " << i << std::endl;
-        PrintColumn(*expected.column(i), &ss);
+        PrintChunkedArray(*expected.column(i), &ss);
       }
       FAIL() << ss.str();
     }
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 914aeb011ca..e7c4bb3db20 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -120,7 +120,6 @@ typedef ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type
 
 class Array;
 class ChunkedArray;
-class Column;
 class RecordBatch;
 class Table;
 
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index f693a4535e9..1e26a4420a4 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -145,24 +145,30 @@ PRIMITIVE_RAND_FLOAT_IMPL(Float64, double, DoubleType)
 #undef PRIMITIVE_RAND_FLOAT_IMPL
 #undef PRIMITIVE_RAND_IMPL
 
-std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
-                                                           int32_t min_length,
-                                                           int32_t max_length,
-                                                           double null_probability) {
+template <typename TypeClass>
+static std::shared_ptr<arrow::Array> GenerateBinaryArray(RandomArrayGenerator* gen,
+                                                         int64_t size, int32_t min_length,
+                                                         int32_t max_length,
+                                                         double null_probability) {
+  using offset_type = typename TypeClass::offset_type;
+  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
+  using OffsetArrowType = typename CTypeTraits<offset_type>::ArrowType;
+  using OffsetArrayType = typename TypeTraits<OffsetArrowType>::ArrayType;
+
   if (null_probability < 0 || null_probability > 1) {
     ABORT_NOT_OK(Status::Invalid("null_probability must be between 0 and 1"));
   }
 
-  auto int32_lengths = Int32(size, min_length, max_length, null_probability);
-  auto lengths = std::dynamic_pointer_cast<Int32Array>(int32_lengths);
+  auto lengths = std::dynamic_pointer_cast<OffsetArrayType>(
+      gen->Numeric<OffsetArrowType>(size, min_length, max_length, null_probability));
 
   // Visual Studio does not implement uniform_int_distribution for char types.
   using GenOpt = GenerateOptions<uint8_t, std::uniform_int_distribution<uint16_t>>;
-  GenOpt options(seed(), static_cast<uint8_t>('A'), static_cast<uint8_t>('z'),
+  GenOpt options(gen->seed(), static_cast<uint8_t>('A'), static_cast<uint8_t>('z'),
                  /*null_probability=*/0);
 
   std::vector<uint8_t> str_buffer(max_length);
-  StringBuilder builder;
+  BuilderType builder;
 
   for (int64_t i = 0; i < size; ++i) {
     if (lengths->IsValid(i)) {
@@ -178,6 +184,22 @@ std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
   return result;
 }
 
+std::shared_ptr<arrow::Array> RandomArrayGenerator::String(int64_t size,
+                                                           int32_t min_length,
+                                                           int32_t max_length,
+                                                           double null_probability) {
+  return GenerateBinaryArray<StringType>(this, size, min_length, max_length,
+                                         null_probability);
+}
+
+std::shared_ptr<arrow::Array> RandomArrayGenerator::LargeString(int64_t size,
+                                                                int32_t min_length,
+                                                                int32_t max_length,
+                                                                double null_probability) {
+  return GenerateBinaryArray<LargeStringType>(this, size, min_length, max_length,
+                                              null_probability);
+}
+
 std::shared_ptr<arrow::Array> RandomArrayGenerator::StringWithRepeats(
     int64_t size, int64_t unique, int32_t min_length, int32_t max_length,
     double null_probability) {
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 6b188fd573b..fc8c2d2fbec 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -214,6 +214,19 @@ class ARROW_EXPORT RandomArrayGenerator {
   std::shared_ptr<arrow::Array> String(int64_t size, int32_t min_length,
                                        int32_t max_length, double null_probability);
 
+  /// \brief Generates a random LargeStringArray
+  ///
+  /// \param[in] size the size of the array to generate
+  /// \param[in] min_length the lower bound of the string length
+  ///            determined by the uniform distribution
+  /// \param[in] max_length the upper bound of the string length
+  ///            determined by the uniform distribution
+  /// \param[in] null_probability the probability of a row being null
+  ///
+  /// \return a generated Array
+  std::shared_ptr<arrow::Array> LargeString(int64_t size, int32_t min_length,
+                                            int32_t max_length, double null_probability);
+
   /// \brief Generates a random StringArray with repeated values
   ///
   /// \param[in] size the size of the array to generate
@@ -230,9 +243,9 @@ class ARROW_EXPORT RandomArrayGenerator {
                                                   int32_t min_length, int32_t max_length,
                                                   double null_probability);
 
- private:
   SeedType seed() { return seed_distribution_(seed_rng_); }
 
+ private:
   std::uniform_int_distribution<SeedType> seed_distribution_;
   std::default_random_engine seed_rng_;
 };
diff --git a/cpp/src/arrow/testing/util.h b/cpp/src/arrow/testing/util.h
index d12f57e3b7e..0c67bca6aaf 100644
--- a/cpp/src/arrow/testing/util.h
+++ b/cpp/src/arrow/testing/util.h
@@ -38,7 +38,6 @@ namespace arrow {
 
 class Array;
 class ChunkedArray;
-class Column;
 class MemoryPool;
 class RecordBatch;
 class Table;
diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 7ad1d8ad05d..7bfb7200171 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -354,6 +354,20 @@ TEST(TestStringType, ToString) {
   ASSERT_EQ(str.ToString(), std::string("string"));
 }
 
+TEST(TestLargeBinaryTypes, ToString) {
+  BinaryType bt1;
+  LargeBinaryType t1;
+  LargeBinaryType e1;
+  LargeStringType t2;
+  EXPECT_TRUE(t1.Equals(e1));
+  EXPECT_FALSE(t1.Equals(t2));
+  EXPECT_FALSE(t1.Equals(bt1));
+  ASSERT_EQ(t1.id(), Type::LARGE_BINARY);
+  ASSERT_EQ(t1.ToString(), std::string("large_binary"));
+  ASSERT_EQ(t2.id(), Type::LARGE_STRING);
+  ASSERT_EQ(t2.ToString(), std::string("large_string"));
+}
+
 TEST(TestFixedSizeBinaryType, ToString) {
   auto t = fixed_size_binary(10);
   ASSERT_EQ(t->id(), Type::FIXED_SIZE_BINARY);
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 2dbd31a7dca..4397bf29722 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -143,8 +143,6 @@ FloatingPointType::Precision DoubleType::precision() const {
   return FloatingPointType::DOUBLE;
 }
 
-std::string StringType::ToString() const { return std::string("string"); }
-
 std::string ListType::ToString() const {
   std::stringstream s;
   s << "list<" << value_field()->ToString() << ">";
@@ -178,7 +176,13 @@ std::string FixedSizeListType::ToString() const {
   return s.str();
 }
 
-std::string BinaryType::ToString() const { return std::string("binary"); }
+std::string BinaryType::ToString() const { return "binary"; }
+
+std::string LargeBinaryType::ToString() const { return "large_binary"; }
+
+std::string StringType::ToString() const { return "string"; }
+
+std::string LargeStringType::ToString() const { return "large_string"; }
 
 int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
 
@@ -273,6 +277,9 @@ std::string DurationType::ToString() const {
 UnionType::UnionType(const std::vector<std::shared_ptr<Field>>& fields,
                      const std::vector<uint8_t>& type_codes, UnionMode::type mode)
     : NestedType(Type::UNION), mode_(mode), type_codes_(type_codes) {
+  DCHECK_LE(fields.size(), type_codes.size()) << "union field with unknown type id";
+  DCHECK_GE(fields.size(), type_codes.size())
+      << "type id provided without corresponding union field";
   children_ = fields;
 }
 
@@ -284,6 +291,12 @@ DataTypeLayout UnionType::layout() const {
   }
 }
 
+uint8_t UnionType::max_type_code() const {
+  return type_codes_.size() == 0
+             ? 0
+             : *std::max_element(type_codes_.begin(), type_codes_.end());
+}
+
 std::string UnionType::ToString() const {
   std::stringstream s;
 
@@ -358,7 +371,7 @@ std::string StructType::ToString() const {
       s << ", ";
     }
     std::shared_ptr<Field> field = this->child(i);
-    s << field->name() << ": " << field->type()->ToString();
+    s << field->ToString();
   }
   s << ">";
   return s.str();
@@ -658,7 +671,9 @@ TYPE_FACTORY(float16, HalfFloatType)
 TYPE_FACTORY(float32, FloatType)
 TYPE_FACTORY(float64, DoubleType)
 TYPE_FACTORY(utf8, StringType)
+TYPE_FACTORY(large_utf8, LargeStringType)
 TYPE_FACTORY(binary, BinaryType)
+TYPE_FACTORY(large_binary, LargeBinaryType)
 TYPE_FACTORY(date64, Date64Type)
 TYPE_FACTORY(date32, Date32Type)
 
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 16f486f45f1..572b888df11 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -143,7 +143,13 @@ struct Type {
 
     /// Measure of elapsed time in either seconds, milliseconds, microseconds
     /// or nanoseconds.
-    DURATION
+    DURATION,
+
+    /// Like STRING, but with 64-bit offsets
+    LARGE_STRING,
+
+    /// Like BINARY, but with 64-bit offsets
+    LARGE_BINARY
   };
 };
 
@@ -215,6 +221,7 @@ class ARROW_EXPORT DataType {
   ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
 };
 
+ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, const DataType& type);
 
 /// \brief Base class for all fixed-width data types
@@ -471,6 +478,7 @@ class ARROW_EXPORT DoubleType
 class ARROW_EXPORT ListType : public NestedType {
  public:
   static constexpr Type::type type_id = Type::LIST;
+  using offset_type = int32_t;
 
   // List can contain any other logical value type
   explicit ListType(const std::shared_ptr<DataType>& value_type)
@@ -485,7 +493,7 @@ class ARROW_EXPORT ListType : public NestedType {
   std::shared_ptr<DataType> value_type() const { return children_[0]->type(); }
 
   DataTypeLayout layout() const override {
-    return {{1, CHAR_BIT * sizeof(int32_t)}, false};
+    return {{1, CHAR_BIT * sizeof(offset_type)}, false};
   }
 
   std::string ToString() const override;
@@ -549,23 +557,78 @@ class ARROW_EXPORT FixedSizeListType : public NestedType {
   int32_t list_size_;
 };
 
+/// \brief Base class for all variable-size binary data types
+class ARROW_EXPORT BaseBinaryType : public DataType, public NoExtraMeta {
+ public:
+  using DataType::DataType;
+};
+
 /// \brief Concrete type class for variable-size binary data
-class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
+class ARROW_EXPORT BinaryType : public BaseBinaryType {
  public:
   static constexpr Type::type type_id = Type::BINARY;
+  static constexpr bool is_utf8 = false;
+  using offset_type = int32_t;
 
   BinaryType() : BinaryType(Type::BINARY) {}
 
   DataTypeLayout layout() const override {
-    return {{1, CHAR_BIT * sizeof(int32_t), DataTypeLayout::kVariableSizeBuffer}, false};
+    return {{1, CHAR_BIT * sizeof(offset_type), DataTypeLayout::kVariableSizeBuffer},
+            false};
   }
 
   std::string ToString() const override;
   std::string name() const override { return "binary"; }
 
  protected:
-  // Allow subclasses to change the logical type.
-  explicit BinaryType(Type::type logical_type) : DataType(logical_type) {}
+  // Allow subclasses like StringType to change the logical type.
+  explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for large variable-size binary data
+class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
+ public:
+  static constexpr Type::type type_id = Type::LARGE_BINARY;
+  static constexpr bool is_utf8 = false;
+  using offset_type = int64_t;
+
+  LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
+
+  DataTypeLayout layout() const override {
+    return {{1, CHAR_BIT * sizeof(offset_type), DataTypeLayout::kVariableSizeBuffer},
+            false};
+  }
+
+  std::string ToString() const override;
+  std::string name() const override { return "large_binary"; }
+
+ protected:
+  // Allow subclasses like LargeStringType to change the logical type.
+  explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
+};
+
+/// \brief Concrete type class for variable-size string data, utf8-encoded
+class ARROW_EXPORT StringType : public BinaryType {
+ public:
+  static constexpr Type::type type_id = Type::STRING;
+  static constexpr bool is_utf8 = true;
+
+  StringType() : BinaryType(Type::STRING) {}
+
+  std::string ToString() const override;
+  std::string name() const override { return "utf8"; }
+};
+
+/// \brief Concrete type class for large variable-size string data, utf8-encoded
+class ARROW_EXPORT LargeStringType : public LargeBinaryType {
+ public:
+  static constexpr Type::type type_id = Type::LARGE_STRING;
+  static constexpr bool is_utf8 = true;
+
+  LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
+
+  std::string ToString() const override;
+  std::string name() const override { return "large_utf8"; }
 };
 
 /// \brief Concrete type class for fixed-size binary data
@@ -590,17 +653,6 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri
   int32_t byte_width_;
 };
 
-/// \brief Concrete type class for variable-size string data, utf8-encoded
-class ARROW_EXPORT StringType : public BinaryType {
- public:
-  static constexpr Type::type type_id = Type::STRING;
-
-  StringType() : BinaryType(Type::STRING) {}
-
-  std::string ToString() const override;
-  std::string name() const override { return "utf8"; }
-};
-
 /// \brief Concrete type class for struct data
 class ARROW_EXPORT StructType : public NestedType {
  public:
@@ -686,6 +738,8 @@ class ARROW_EXPORT UnionType : public NestedType {
 
   const std::vector<uint8_t>& type_codes() const { return type_codes_; }
 
+  uint8_t max_type_code() const;
+
   UnionMode::type mode() const { return mode_; }
 
  private:
@@ -760,6 +814,7 @@ struct TimeUnit {
   enum type { SECOND = 0, MILLI = 1, MICRO = 2, NANO = 3 };
 };
 
+ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
 
 /// Base type class for time data
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index ea32b49d168..9935af5111f 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -36,7 +36,6 @@ class Field;
 class Tensor;
 
 class ChunkedArray;
-class Column;
 class RecordBatch;
 class Table;
 
@@ -66,6 +65,11 @@ class BinaryArray;
 class BinaryBuilder;
 struct BinaryScalar;
 
+class LargeBinaryType;
+class LargeBinaryArray;
+class LargeBinaryBuilder;
+struct LargeBinaryScalar;
+
 class FixedSizeBinaryType;
 class FixedSizeBinaryArray;
 class FixedSizeBinaryBuilder;
@@ -76,6 +80,11 @@ class StringArray;
 class StringBuilder;
 struct StringScalar;
 
+class LargeStringType;
+class LargeStringArray;
+class LargeStringBuilder;
+struct LargeStringScalar;
+
 class ListType;
 class ListArray;
 class ListBuilder;
@@ -219,8 +228,12 @@ std::shared_ptr<DataType> ARROW_EXPORT float32();
 std::shared_ptr<DataType> ARROW_EXPORT float64();
 /// \brief Return a StringType instance
 std::shared_ptr<DataType> ARROW_EXPORT utf8();
+/// \brief Return a LargeStringType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
 /// \brief Return a BinaryType instance
 std::shared_ptr<DataType> ARROW_EXPORT binary();
+/// \brief Return a LargeBinaryType instance
+std::shared_ptr<DataType> ARROW_EXPORT large_binary();
 /// \brief Return a Date32Type instance
 std::shared_ptr<DataType> ARROW_EXPORT date32();
 /// \brief Return a Date64Type instance
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 4902f5c6334..50e1e725a1b 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -243,6 +243,15 @@ struct TypeTraits<BinaryType> {
   static inline std::shared_ptr<DataType> type_singleton() { return binary(); }
 };
 
+template <>
+struct TypeTraits<LargeBinaryType> {
+  using ArrayType = LargeBinaryArray;
+  using BuilderType = LargeBinaryBuilder;
+  using ScalarType = LargeBinaryScalar;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return large_binary(); }
+};
+
 template <>
 struct TypeTraits<FixedSizeBinaryType> {
   using ArrayType = FixedSizeBinaryArray;
@@ -260,6 +269,15 @@ struct TypeTraits<StringType> {
   static inline std::shared_ptr<DataType> type_singleton() { return utf8(); }
 };
 
+template <>
+struct TypeTraits<LargeStringType> {
+  using ArrayType = LargeStringArray;
+  using BuilderType = LargeStringBuilder;
+  using ScalarType = LargeStringScalar;
+  constexpr static bool is_parameter_free = true;
+  static inline std::shared_ptr<DataType> type_singleton() { return large_utf8(); }
+};
+
 template <>
 struct CTypeTraits<std::string> : public TypeTraits<StringType> {
   using ArrowType = StringType;
@@ -361,6 +379,12 @@ struct is_8bit_int {
       (std::is_same<UInt8Type, T>::value || std::is_same<Int8Type, T>::value);
 };
 
+template <typename T>
+struct is_any_string_type {
+  static constexpr bool value =
+      std::is_same<StringType, T>::value || std::is_same<LargeStringType, T>::value;
+};
+
 template <typename T, typename R = void>
 using enable_if_8bit_int = typename std::enable_if<is_8bit_int<T>::value, R>::type;
 
@@ -412,10 +436,18 @@ using enable_if_has_c_type = typename std::enable_if<has_c_type<T>::value, R>::t
 template <typename T, typename R = void>
 using enable_if_null = typename std::enable_if<std::is_same<NullType, T>::value, R>::type;
 
+template <typename T, typename R = void>
+using enable_if_base_binary =
+    typename std::enable_if<std::is_base_of<BaseBinaryType, T>::value, R>::type;
+
 template <typename T, typename R = void>
 using enable_if_binary =
     typename std::enable_if<std::is_base_of<BinaryType, T>::value, R>::type;
 
+template <typename T, typename R = void>
+using enable_if_large_binary =
+    typename std::enable_if<std::is_base_of<LargeBinaryType, T>::value, R>::type;
+
 template <typename T, typename R = void>
 using enable_if_boolean =
     typename std::enable_if<std::is_same<BooleanType, T>::value, R>::type;
@@ -574,6 +606,17 @@ static inline bool is_binary_like(Type::type type_id) {
   return false;
 }
 
+static inline bool is_large_binary_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_dictionary(Type::type type_id) {
   return type_id == Type::DICTIONARY;
 }
diff --git a/r/src/column.cpp b/cpp/src/arrow/util/atomic_shared_ptr.h
similarity index 51%
rename from r/src/column.cpp
rename to cpp/src/arrow/util/atomic_shared_ptr.h
index 026cb6904d4..9f3152bafd5 100644
--- a/r/src/column.cpp
+++ b/cpp/src/arrow/util/atomic_shared_ptr.h
@@ -15,30 +15,43 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "./arrow_types.h"
+#pragma once
 
-#if defined(ARROW_R_WITH_ARROW)
+#include <atomic>
+#include <memory>
+#include <utility>
 
-// [[arrow::export]]
-int Column__length(const std::shared_ptr<arrow::Column>& column) {
-  return column->length();
+namespace arrow {
+namespace internal {
+
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 5
+
+// atomic shared_ptr operations only appeared in gcc 5,
+// emulate them with unsafe ops on gcc 4.x.
+
+template <class T>
+inline std::shared_ptr<T> atomic_load(const std::shared_ptr<T>* p) {
+  return *p;
 }
 
-// [[arrow::export]]
-int Column__null_count(const std::shared_ptr<arrow::Column>& column) {
-  return column->null_count();
+template <class T>
+inline void atomic_store(std::shared_ptr<T>* p, std::shared_ptr<T> r) {
+  *p = r;
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::DataType> Column__type(
-    const std::shared_ptr<arrow::Column>& column) {
-  return column->type();
+#else
+
+template <class T>
+inline std::shared_ptr<T> atomic_load(const std::shared_ptr<T>* p) {
+  return std::atomic_load(p);
 }
 
-// [[arrow::export]]
-std::shared_ptr<arrow::ChunkedArray> Column__data(
-    const std::shared_ptr<arrow::Column>& column) {
-  return column->data();
+template <class T>
+inline void atomic_store(std::shared_ptr<T>* p, std::shared_ptr<T> r) {
+  std::atomic_store(p, std::move(r));
 }
 
 #endif
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/bpacking.h b/cpp/src/arrow/util/bpacking.h
index 14258cff6e4..98c2e7deaee 100644
--- a/cpp/src/arrow/util/bpacking.h
+++ b/cpp/src/arrow/util/bpacking.h
@@ -28,74 +28,76 @@
 #define ARROW_UTIL_BPACKING_H
 
 #include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
 
 namespace arrow {
 namespace internal {
 
 inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) & 1;
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) & 1;
   out++;
-  *out = ((*in) >> 1) & 1;
+  *out = (inl >> 1) & 1;
   out++;
-  *out = ((*in) >> 2) & 1;
+  *out = (inl >> 2) & 1;
   out++;
-  *out = ((*in) >> 3) & 1;
+  *out = (inl >> 3) & 1;
   out++;
-  *out = ((*in) >> 4) & 1;
+  *out = (inl >> 4) & 1;
   out++;
-  *out = ((*in) >> 5) & 1;
+  *out = (inl >> 5) & 1;
   out++;
-  *out = ((*in) >> 6) & 1;
+  *out = (inl >> 6) & 1;
   out++;
-  *out = ((*in) >> 7) & 1;
+  *out = (inl >> 7) & 1;
   out++;
-  *out = ((*in) >> 8) & 1;
+  *out = (inl >> 8) & 1;
   out++;
-  *out = ((*in) >> 9) & 1;
+  *out = (inl >> 9) & 1;
   out++;
-  *out = ((*in) >> 10) & 1;
+  *out = (inl >> 10) & 1;
   out++;
-  *out = ((*in) >> 11) & 1;
+  *out = (inl >> 11) & 1;
   out++;
-  *out = ((*in) >> 12) & 1;
+  *out = (inl >> 12) & 1;
   out++;
-  *out = ((*in) >> 13) & 1;
+  *out = (inl >> 13) & 1;
   out++;
-  *out = ((*in) >> 14) & 1;
+  *out = (inl >> 14) & 1;
   out++;
-  *out = ((*in) >> 15) & 1;
+  *out = (inl >> 15) & 1;
   out++;
-  *out = ((*in) >> 16) & 1;
+  *out = (inl >> 16) & 1;
   out++;
-  *out = ((*in) >> 17) & 1;
+  *out = (inl >> 17) & 1;
   out++;
-  *out = ((*in) >> 18) & 1;
+  *out = (inl >> 18) & 1;
   out++;
-  *out = ((*in) >> 19) & 1;
+  *out = (inl >> 19) & 1;
   out++;
-  *out = ((*in) >> 20) & 1;
+  *out = (inl >> 20) & 1;
   out++;
-  *out = ((*in) >> 21) & 1;
+  *out = (inl >> 21) & 1;
   out++;
-  *out = ((*in) >> 22) & 1;
+  *out = (inl >> 22) & 1;
   out++;
-  *out = ((*in) >> 23) & 1;
+  *out = (inl >> 23) & 1;
   out++;
-  *out = ((*in) >> 24) & 1;
+  *out = (inl >> 24) & 1;
   out++;
-  *out = ((*in) >> 25) & 1;
+  *out = (inl >> 25) & 1;
   out++;
-  *out = ((*in) >> 26) & 1;
+  *out = (inl >> 26) & 1;
   out++;
-  *out = ((*in) >> 27) & 1;
+  *out = (inl >> 27) & 1;
   out++;
-  *out = ((*in) >> 28) & 1;
+  *out = (inl >> 28) & 1;
   out++;
-  *out = ((*in) >> 29) & 1;
+  *out = (inl >> 29) & 1;
   out++;
-  *out = ((*in) >> 30) & 1;
+  *out = (inl >> 30) & 1;
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
   out++;
 
@@ -103,70 +105,72 @@ inline const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 2);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 2);
+  *out = (inl >> 2) % (1U << 2);
   out++;
-  *out = ((*in) >> 4) % (1U << 2);
+  *out = (inl >> 4) % (1U << 2);
   out++;
-  *out = ((*in) >> 6) % (1U << 2);
+  *out = (inl >> 6) % (1U << 2);
   out++;
-  *out = ((*in) >> 8) % (1U << 2);
+  *out = (inl >> 8) % (1U << 2);
   out++;
-  *out = ((*in) >> 10) % (1U << 2);
+  *out = (inl >> 10) % (1U << 2);
   out++;
-  *out = ((*in) >> 12) % (1U << 2);
+  *out = (inl >> 12) % (1U << 2);
   out++;
-  *out = ((*in) >> 14) % (1U << 2);
+  *out = (inl >> 14) % (1U << 2);
   out++;
-  *out = ((*in) >> 16) % (1U << 2);
+  *out = (inl >> 16) % (1U << 2);
   out++;
-  *out = ((*in) >> 18) % (1U << 2);
+  *out = (inl >> 18) % (1U << 2);
   out++;
-  *out = ((*in) >> 20) % (1U << 2);
+  *out = (inl >> 20) % (1U << 2);
   out++;
-  *out = ((*in) >> 22) % (1U << 2);
+  *out = (inl >> 22) % (1U << 2);
   out++;
-  *out = ((*in) >> 24) % (1U << 2);
+  *out = (inl >> 24) % (1U << 2);
   out++;
-  *out = ((*in) >> 26) % (1U << 2);
+  *out = (inl >> 26) % (1U << 2);
   out++;
-  *out = ((*in) >> 28) % (1U << 2);
+  *out = (inl >> 28) % (1U << 2);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 2);
+  *out = (inl >> 0) % (1U << 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 2);
+  *out = (inl >> 2) % (1U << 2);
   out++;
-  *out = ((*in) >> 4) % (1U << 2);
+  *out = (inl >> 4) % (1U << 2);
   out++;
-  *out = ((*in) >> 6) % (1U << 2);
+  *out = (inl >> 6) % (1U << 2);
   out++;
-  *out = ((*in) >> 8) % (1U << 2);
+  *out = (inl >> 8) % (1U << 2);
   out++;
-  *out = ((*in) >> 10) % (1U << 2);
+  *out = (inl >> 10) % (1U << 2);
   out++;
-  *out = ((*in) >> 12) % (1U << 2);
+  *out = (inl >> 12) % (1U << 2);
   out++;
-  *out = ((*in) >> 14) % (1U << 2);
+  *out = (inl >> 14) % (1U << 2);
   out++;
-  *out = ((*in) >> 16) % (1U << 2);
+  *out = (inl >> 16) % (1U << 2);
   out++;
-  *out = ((*in) >> 18) % (1U << 2);
+  *out = (inl >> 18) % (1U << 2);
   out++;
-  *out = ((*in) >> 20) % (1U << 2);
+  *out = (inl >> 20) % (1U << 2);
   out++;
-  *out = ((*in) >> 22) % (1U << 2);
+  *out = (inl >> 22) % (1U << 2);
   out++;
-  *out = ((*in) >> 24) % (1U << 2);
+  *out = (inl >> 24) % (1U << 2);
   out++;
-  *out = ((*in) >> 26) % (1U << 2);
+  *out = (inl >> 26) % (1U << 2);
   out++;
-  *out = ((*in) >> 28) % (1U << 2);
+  *out = (inl >> 28) % (1U << 2);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
   out++;
 
@@ -174,73 +178,76 @@ inline const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 3);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 3);
+  *out = (inl >> 3) % (1U << 3);
   out++;
-  *out = ((*in) >> 6) % (1U << 3);
+  *out = (inl >> 6) % (1U << 3);
   out++;
-  *out = ((*in) >> 9) % (1U << 3);
+  *out = (inl >> 9) % (1U << 3);
   out++;
-  *out = ((*in) >> 12) % (1U << 3);
+  *out = (inl >> 12) % (1U << 3);
   out++;
-  *out = ((*in) >> 15) % (1U << 3);
+  *out = (inl >> 15) % (1U << 3);
   out++;
-  *out = ((*in) >> 18) % (1U << 3);
+  *out = (inl >> 18) % (1U << 3);
   out++;
-  *out = ((*in) >> 21) % (1U << 3);
+  *out = (inl >> 21) % (1U << 3);
   out++;
-  *out = ((*in) >> 24) % (1U << 3);
+  *out = (inl >> 24) % (1U << 3);
   out++;
-  *out = ((*in) >> 27) % (1U << 3);
+  *out = (inl >> 27) % (1U << 3);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (3 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (3 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 3);
+  *out = (inl >> 1) % (1U << 3);
   out++;
-  *out = ((*in) >> 4) % (1U << 3);
+  *out = (inl >> 4) % (1U << 3);
   out++;
-  *out = ((*in) >> 7) % (1U << 3);
+  *out = (inl >> 7) % (1U << 3);
   out++;
-  *out = ((*in) >> 10) % (1U << 3);
+  *out = (inl >> 10) % (1U << 3);
   out++;
-  *out = ((*in) >> 13) % (1U << 3);
+  *out = (inl >> 13) % (1U << 3);
   out++;
-  *out = ((*in) >> 16) % (1U << 3);
+  *out = (inl >> 16) % (1U << 3);
   out++;
-  *out = ((*in) >> 19) % (1U << 3);
+  *out = (inl >> 19) % (1U << 3);
   out++;
-  *out = ((*in) >> 22) % (1U << 3);
+  *out = (inl >> 22) % (1U << 3);
   out++;
-  *out = ((*in) >> 25) % (1U << 3);
+  *out = (inl >> 25) % (1U << 3);
   out++;
-  *out = ((*in) >> 28) % (1U << 3);
+  *out = (inl >> 28) % (1U << 3);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (3 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (3 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 3);
+  *out = (inl >> 2) % (1U << 3);
   out++;
-  *out = ((*in) >> 5) % (1U << 3);
+  *out = (inl >> 5) % (1U << 3);
   out++;
-  *out = ((*in) >> 8) % (1U << 3);
+  *out = (inl >> 8) % (1U << 3);
   out++;
-  *out = ((*in) >> 11) % (1U << 3);
+  *out = (inl >> 11) % (1U << 3);
   out++;
-  *out = ((*in) >> 14) % (1U << 3);
+  *out = (inl >> 14) % (1U << 3);
   out++;
-  *out = ((*in) >> 17) % (1U << 3);
+  *out = (inl >> 17) % (1U << 3);
   out++;
-  *out = ((*in) >> 20) % (1U << 3);
+  *out = (inl >> 20) % (1U << 3);
   out++;
-  *out = ((*in) >> 23) % (1U << 3);
+  *out = (inl >> 23) % (1U << 3);
   out++;
-  *out = ((*in) >> 26) % (1U << 3);
+  *out = (inl >> 26) % (1U << 3);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
   out++;
 
@@ -248,72 +255,76 @@ inline const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 4);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 4);
+  *out = (inl >> 0) % (1U << 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 4);
+  *out = (inl >> 4) % (1U << 4);
   out++;
-  *out = ((*in) >> 8) % (1U << 4);
+  *out = (inl >> 8) % (1U << 4);
   out++;
-  *out = ((*in) >> 12) % (1U << 4);
+  *out = (inl >> 12) % (1U << 4);
   out++;
-  *out = ((*in) >> 16) % (1U << 4);
+  *out = (inl >> 16) % (1U << 4);
   out++;
-  *out = ((*in) >> 20) % (1U << 4);
+  *out = (inl >> 20) % (1U << 4);
   out++;
-  *out = ((*in) >> 24) % (1U << 4);
+  *out = (inl >> 24) % (1U << 4);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
   out++;
 
@@ -321,77 +332,82 @@ inline const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 5);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 5);
+  *out = (inl >> 5) % (1U << 5);
   out++;
-  *out = ((*in) >> 10) % (1U << 5);
+  *out = (inl >> 10) % (1U << 5);
   out++;
-  *out = ((*in) >> 15) % (1U << 5);
+  *out = (inl >> 15) % (1U << 5);
   out++;
-  *out = ((*in) >> 20) % (1U << 5);
+  *out = (inl >> 20) % (1U << 5);
   out++;
-  *out = ((*in) >> 25) % (1U << 5);
+  *out = (inl >> 25) % (1U << 5);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (5 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (5 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 5);
+  *out = (inl >> 3) % (1U << 5);
   out++;
-  *out = ((*in) >> 8) % (1U << 5);
+  *out = (inl >> 8) % (1U << 5);
   out++;
-  *out = ((*in) >> 13) % (1U << 5);
+  *out = (inl >> 13) % (1U << 5);
   out++;
-  *out = ((*in) >> 18) % (1U << 5);
+  *out = (inl >> 18) % (1U << 5);
   out++;
-  *out = ((*in) >> 23) % (1U << 5);
+  *out = (inl >> 23) % (1U << 5);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (5 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (5 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 5);
+  *out = (inl >> 1) % (1U << 5);
   out++;
-  *out = ((*in) >> 6) % (1U << 5);
+  *out = (inl >> 6) % (1U << 5);
   out++;
-  *out = ((*in) >> 11) % (1U << 5);
+  *out = (inl >> 11) % (1U << 5);
   out++;
-  *out = ((*in) >> 16) % (1U << 5);
+  *out = (inl >> 16) % (1U << 5);
   out++;
-  *out = ((*in) >> 21) % (1U << 5);
+  *out = (inl >> 21) % (1U << 5);
   out++;
-  *out = ((*in) >> 26) % (1U << 5);
+  *out = (inl >> 26) % (1U << 5);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (5 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (5 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 5);
+  *out = (inl >> 4) % (1U << 5);
   out++;
-  *out = ((*in) >> 9) % (1U << 5);
+  *out = (inl >> 9) % (1U << 5);
   out++;
-  *out = ((*in) >> 14) % (1U << 5);
+  *out = (inl >> 14) % (1U << 5);
   out++;
-  *out = ((*in) >> 19) % (1U << 5);
+  *out = (inl >> 19) % (1U << 5);
   out++;
-  *out = ((*in) >> 24) % (1U << 5);
+  *out = (inl >> 24) % (1U << 5);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (5 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (5 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 5);
+  *out = (inl >> 2) % (1U << 5);
   out++;
-  *out = ((*in) >> 7) % (1U << 5);
+  *out = (inl >> 7) % (1U << 5);
   out++;
-  *out = ((*in) >> 12) % (1U << 5);
+  *out = (inl >> 12) % (1U << 5);
   out++;
-  *out = ((*in) >> 17) % (1U << 5);
+  *out = (inl >> 17) % (1U << 5);
   out++;
-  *out = ((*in) >> 22) % (1U << 5);
+  *out = (inl >> 22) % (1U << 5);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
   out++;
 
@@ -399,78 +415,84 @@ inline const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 6);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 6);
+  *out = (inl >> 6) % (1U << 6);
   out++;
-  *out = ((*in) >> 12) % (1U << 6);
+  *out = (inl >> 12) % (1U << 6);
   out++;
-  *out = ((*in) >> 18) % (1U << 6);
+  *out = (inl >> 18) % (1U << 6);
   out++;
-  *out = ((*in) >> 24) % (1U << 6);
+  *out = (inl >> 24) % (1U << 6);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (6 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (6 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 6);
+  *out = (inl >> 4) % (1U << 6);
   out++;
-  *out = ((*in) >> 10) % (1U << 6);
+  *out = (inl >> 10) % (1U << 6);
   out++;
-  *out = ((*in) >> 16) % (1U << 6);
+  *out = (inl >> 16) % (1U << 6);
   out++;
-  *out = ((*in) >> 22) % (1U << 6);
+  *out = (inl >> 22) % (1U << 6);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (6 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (6 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 6);
+  *out = (inl >> 2) % (1U << 6);
   out++;
-  *out = ((*in) >> 8) % (1U << 6);
+  *out = (inl >> 8) % (1U << 6);
   out++;
-  *out = ((*in) >> 14) % (1U << 6);
+  *out = (inl >> 14) % (1U << 6);
   out++;
-  *out = ((*in) >> 20) % (1U << 6);
+  *out = (inl >> 20) % (1U << 6);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 6);
+  *out = (inl >> 0) % (1U << 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 6);
+  *out = (inl >> 6) % (1U << 6);
   out++;
-  *out = ((*in) >> 12) % (1U << 6);
+  *out = (inl >> 12) % (1U << 6);
   out++;
-  *out = ((*in) >> 18) % (1U << 6);
+  *out = (inl >> 18) % (1U << 6);
   out++;
-  *out = ((*in) >> 24) % (1U << 6);
+  *out = (inl >> 24) % (1U << 6);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (6 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (6 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 6);
+  *out = (inl >> 4) % (1U << 6);
   out++;
-  *out = ((*in) >> 10) % (1U << 6);
+  *out = (inl >> 10) % (1U << 6);
   out++;
-  *out = ((*in) >> 16) % (1U << 6);
+  *out = (inl >> 16) % (1U << 6);
   out++;
-  *out = ((*in) >> 22) % (1U << 6);
+  *out = (inl >> 22) % (1U << 6);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (6 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (6 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 6);
+  *out = (inl >> 2) % (1U << 6);
   out++;
-  *out = ((*in) >> 8) % (1U << 6);
+  *out = (inl >> 8) % (1U << 6);
   out++;
-  *out = ((*in) >> 14) % (1U << 6);
+  *out = (inl >> 14) % (1U << 6);
   out++;
-  *out = ((*in) >> 20) % (1U << 6);
+  *out = (inl >> 20) % (1U << 6);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
   out++;
 
@@ -478,81 +500,88 @@ inline const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 7);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 7);
+  *out = (inl >> 7) % (1U << 7);
   out++;
-  *out = ((*in) >> 14) % (1U << 7);
+  *out = (inl >> 14) % (1U << 7);
   out++;
-  *out = ((*in) >> 21) % (1U << 7);
+  *out = (inl >> 21) % (1U << 7);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (7 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (7 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 7);
+  *out = (inl >> 3) % (1U << 7);
   out++;
-  *out = ((*in) >> 10) % (1U << 7);
+  *out = (inl >> 10) % (1U << 7);
   out++;
-  *out = ((*in) >> 17) % (1U << 7);
+  *out = (inl >> 17) % (1U << 7);
   out++;
-  *out = ((*in) >> 24) % (1U << 7);
+  *out = (inl >> 24) % (1U << 7);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (7 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (7 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 7);
+  *out = (inl >> 6) % (1U << 7);
   out++;
-  *out = ((*in) >> 13) % (1U << 7);
+  *out = (inl >> 13) % (1U << 7);
   out++;
-  *out = ((*in) >> 20) % (1U << 7);
+  *out = (inl >> 20) % (1U << 7);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (7 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (7 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 7);
+  *out = (inl >> 2) % (1U << 7);
   out++;
-  *out = ((*in) >> 9) % (1U << 7);
+  *out = (inl >> 9) % (1U << 7);
   out++;
-  *out = ((*in) >> 16) % (1U << 7);
+  *out = (inl >> 16) % (1U << 7);
   out++;
-  *out = ((*in) >> 23) % (1U << 7);
+  *out = (inl >> 23) % (1U << 7);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (7 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (7 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 7);
+  *out = (inl >> 5) % (1U << 7);
   out++;
-  *out = ((*in) >> 12) % (1U << 7);
+  *out = (inl >> 12) % (1U << 7);
   out++;
-  *out = ((*in) >> 19) % (1U << 7);
+  *out = (inl >> 19) % (1U << 7);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (7 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (7 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 7);
+  *out = (inl >> 1) % (1U << 7);
   out++;
-  *out = ((*in) >> 8) % (1U << 7);
+  *out = (inl >> 8) % (1U << 7);
   out++;
-  *out = ((*in) >> 15) % (1U << 7);
+  *out = (inl >> 15) % (1U << 7);
   out++;
-  *out = ((*in) >> 22) % (1U << 7);
+  *out = (inl >> 22) % (1U << 7);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (7 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (7 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 7);
+  *out = (inl >> 4) % (1U << 7);
   out++;
-  *out = ((*in) >> 11) % (1U << 7);
+  *out = (inl >> 11) % (1U << 7);
   out++;
-  *out = ((*in) >> 18) % (1U << 7);
+  *out = (inl >> 18) % (1U << 7);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
   out++;
 
@@ -560,76 +589,84 @@ inline const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 8);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 8);
+  *out = (inl >> 0) % (1U << 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 8);
+  *out = (inl >> 8) % (1U << 8);
   out++;
-  *out = ((*in) >> 16) % (1U << 8);
+  *out = (inl >> 16) % (1U << 8);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
   out++;
 
@@ -637,85 +674,94 @@ inline const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 9);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 9);
+  *out = (inl >> 9) % (1U << 9);
   out++;
-  *out = ((*in) >> 18) % (1U << 9);
+  *out = (inl >> 18) % (1U << 9);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (9 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (9 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 9);
+  *out = (inl >> 4) % (1U << 9);
   out++;
-  *out = ((*in) >> 13) % (1U << 9);
+  *out = (inl >> 13) % (1U << 9);
   out++;
-  *out = ((*in) >> 22) % (1U << 9);
+  *out = (inl >> 22) % (1U << 9);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (9 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (9 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 9);
+  *out = (inl >> 8) % (1U << 9);
   out++;
-  *out = ((*in) >> 17) % (1U << 9);
+  *out = (inl >> 17) % (1U << 9);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (9 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (9 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 9);
+  *out = (inl >> 3) % (1U << 9);
   out++;
-  *out = ((*in) >> 12) % (1U << 9);
+  *out = (inl >> 12) % (1U << 9);
   out++;
-  *out = ((*in) >> 21) % (1U << 9);
+  *out = (inl >> 21) % (1U << 9);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (9 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (9 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 9);
+  *out = (inl >> 7) % (1U << 9);
   out++;
-  *out = ((*in) >> 16) % (1U << 9);
+  *out = (inl >> 16) % (1U << 9);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (9 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (9 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 9);
+  *out = (inl >> 2) % (1U << 9);
   out++;
-  *out = ((*in) >> 11) % (1U << 9);
+  *out = (inl >> 11) % (1U << 9);
   out++;
-  *out = ((*in) >> 20) % (1U << 9);
+  *out = (inl >> 20) % (1U << 9);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (9 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (9 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 9);
+  *out = (inl >> 6) % (1U << 9);
   out++;
-  *out = ((*in) >> 15) % (1U << 9);
+  *out = (inl >> 15) % (1U << 9);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (9 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (9 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 9);
+  *out = (inl >> 1) % (1U << 9);
   out++;
-  *out = ((*in) >> 10) % (1U << 9);
+  *out = (inl >> 10) % (1U << 9);
   out++;
-  *out = ((*in) >> 19) % (1U << 9);
+  *out = (inl >> 19) % (1U << 9);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (9 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (9 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 9);
+  *out = (inl >> 5) % (1U << 9);
   out++;
-  *out = ((*in) >> 14) % (1U << 9);
+  *out = (inl >> 14) % (1U << 9);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
   out++;
 
@@ -723,86 +769,96 @@ inline const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 10);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 10);
+  *out = (inl >> 10) % (1U << 10);
   out++;
-  *out = ((*in) >> 20) % (1U << 10);
+  *out = (inl >> 20) % (1U << 10);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (10 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (10 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 10);
+  *out = (inl >> 8) % (1U << 10);
   out++;
-  *out = ((*in) >> 18) % (1U << 10);
+  *out = (inl >> 18) % (1U << 10);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (10 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (10 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 10);
+  *out = (inl >> 6) % (1U << 10);
   out++;
-  *out = ((*in) >> 16) % (1U << 10);
+  *out = (inl >> 16) % (1U << 10);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (10 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (10 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 10);
+  *out = (inl >> 4) % (1U << 10);
   out++;
-  *out = ((*in) >> 14) % (1U << 10);
+  *out = (inl >> 14) % (1U << 10);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (10 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (10 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 10);
+  *out = (inl >> 2) % (1U << 10);
   out++;
-  *out = ((*in) >> 12) % (1U << 10);
+  *out = (inl >> 12) % (1U << 10);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 10);
+  *out = (inl >> 0) % (1U << 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 10);
+  *out = (inl >> 10) % (1U << 10);
   out++;
-  *out = ((*in) >> 20) % (1U << 10);
+  *out = (inl >> 20) % (1U << 10);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (10 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (10 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 10);
+  *out = (inl >> 8) % (1U << 10);
   out++;
-  *out = ((*in) >> 18) % (1U << 10);
+  *out = (inl >> 18) % (1U << 10);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (10 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (10 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 10);
+  *out = (inl >> 6) % (1U << 10);
   out++;
-  *out = ((*in) >> 16) % (1U << 10);
+  *out = (inl >> 16) % (1U << 10);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (10 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (10 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 10);
+  *out = (inl >> 4) % (1U << 10);
   out++;
-  *out = ((*in) >> 14) % (1U << 10);
+  *out = (inl >> 14) % (1U << 10);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (10 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (10 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 10);
+  *out = (inl >> 2) % (1U << 10);
   out++;
-  *out = ((*in) >> 12) % (1U << 10);
+  *out = (inl >> 12) % (1U << 10);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
   out++;
 
@@ -810,89 +866,100 @@ inline const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 11);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 11);
+  *out = (inl >> 11) % (1U << 11);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (11 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (11 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 11);
+  *out = (inl >> 1) % (1U << 11);
   out++;
-  *out = ((*in) >> 12) % (1U << 11);
+  *out = (inl >> 12) % (1U << 11);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (11 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (11 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 11);
+  *out = (inl >> 2) % (1U << 11);
   out++;
-  *out = ((*in) >> 13) % (1U << 11);
+  *out = (inl >> 13) % (1U << 11);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (11 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (11 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 11);
+  *out = (inl >> 3) % (1U << 11);
   out++;
-  *out = ((*in) >> 14) % (1U << 11);
+  *out = (inl >> 14) % (1U << 11);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (11 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (11 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 11);
+  *out = (inl >> 4) % (1U << 11);
   out++;
-  *out = ((*in) >> 15) % (1U << 11);
+  *out = (inl >> 15) % (1U << 11);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (11 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (11 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 11);
+  *out = (inl >> 5) % (1U << 11);
   out++;
-  *out = ((*in) >> 16) % (1U << 11);
+  *out = (inl >> 16) % (1U << 11);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (11 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (11 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 11);
+  *out = (inl >> 6) % (1U << 11);
   out++;
-  *out = ((*in) >> 17) % (1U << 11);
+  *out = (inl >> 17) % (1U << 11);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (11 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (11 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 11);
+  *out = (inl >> 7) % (1U << 11);
   out++;
-  *out = ((*in) >> 18) % (1U << 11);
+  *out = (inl >> 18) % (1U << 11);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (11 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (11 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 11);
+  *out = (inl >> 8) % (1U << 11);
   out++;
-  *out = ((*in) >> 19) % (1U << 11);
+  *out = (inl >> 19) % (1U << 11);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (11 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (11 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 11);
+  *out = (inl >> 9) % (1U << 11);
   out++;
-  *out = ((*in) >> 20) % (1U << 11);
+  *out = (inl >> 20) % (1U << 11);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (11 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (11 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 11);
+  *out = (inl >> 10) % (1U << 11);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
   out++;
 
@@ -900,88 +967,100 @@ inline const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 12);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 12);
+  *out = (inl >> 0) % (1U << 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 12);
+  *out = (inl >> 12) % (1U << 12);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (12 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (12 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 12);
+  *out = (inl >> 4) % (1U << 12);
   out++;
-  *out = ((*in) >> 16) % (1U << 12);
+  *out = (inl >> 16) % (1U << 12);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (12 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (12 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 12);
+  *out = (inl >> 8) % (1U << 12);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
   out++;
 
@@ -989,93 +1068,106 @@ inline const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 13);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 13);
+  *out = (inl >> 13) % (1U << 13);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (13 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (13 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 13);
+  *out = (inl >> 7) % (1U << 13);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (13 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (13 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 13);
+  *out = (inl >> 1) % (1U << 13);
   out++;
-  *out = ((*in) >> 14) % (1U << 13);
+  *out = (inl >> 14) % (1U << 13);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (13 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (13 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 13);
+  *out = (inl >> 8) % (1U << 13);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (13 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (13 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 13);
+  *out = (inl >> 2) % (1U << 13);
   out++;
-  *out = ((*in) >> 15) % (1U << 13);
+  *out = (inl >> 15) % (1U << 13);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (13 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (13 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 13);
+  *out = (inl >> 9) % (1U << 13);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (13 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (13 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 13);
+  *out = (inl >> 3) % (1U << 13);
   out++;
-  *out = ((*in) >> 16) % (1U << 13);
+  *out = (inl >> 16) % (1U << 13);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (13 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (13 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 13);
+  *out = (inl >> 10) % (1U << 13);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (13 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (13 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 13);
+  *out = (inl >> 4) % (1U << 13);
   out++;
-  *out = ((*in) >> 17) % (1U << 13);
+  *out = (inl >> 17) % (1U << 13);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (13 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (13 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 13);
+  *out = (inl >> 11) % (1U << 13);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (13 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (13 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 13);
+  *out = (inl >> 5) % (1U << 13);
   out++;
-  *out = ((*in) >> 18) % (1U << 13);
+  *out = (inl >> 18) % (1U << 13);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (13 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (13 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 13);
+  *out = (inl >> 12) % (1U << 13);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (13 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (13 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 13);
+  *out = (inl >> 6) % (1U << 13);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
   out++;
 
@@ -1083,94 +1175,108 @@ inline const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 14);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 14);
+  *out = (inl >> 14) % (1U << 14);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (14 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (14 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 14);
+  *out = (inl >> 10) % (1U << 14);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (14 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (14 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 14);
+  *out = (inl >> 6) % (1U << 14);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (14 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (14 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 14);
+  *out = (inl >> 2) % (1U << 14);
   out++;
-  *out = ((*in) >> 16) % (1U << 14);
+  *out = (inl >> 16) % (1U << 14);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (14 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (14 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 14);
+  *out = (inl >> 12) % (1U << 14);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (14 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (14 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 14);
+  *out = (inl >> 8) % (1U << 14);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (14 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (14 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 14);
+  *out = (inl >> 4) % (1U << 14);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 14);
+  *out = (inl >> 0) % (1U << 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 14);
+  *out = (inl >> 14) % (1U << 14);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (14 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (14 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 14);
+  *out = (inl >> 10) % (1U << 14);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (14 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (14 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 14);
+  *out = (inl >> 6) % (1U << 14);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (14 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (14 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 14);
+  *out = (inl >> 2) % (1U << 14);
   out++;
-  *out = ((*in) >> 16) % (1U << 14);
+  *out = (inl >> 16) % (1U << 14);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (14 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (14 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 14);
+  *out = (inl >> 12) % (1U << 14);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (14 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (14 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 14);
+  *out = (inl >> 8) % (1U << 14);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (14 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (14 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 14);
+  *out = (inl >> 4) % (1U << 14);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
   out++;
 
@@ -1178,97 +1284,112 @@ inline const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 15);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 15);
   out++;
-  *out = ((*in) >> 15) % (1U << 15);
+  *out = (inl >> 15) % (1U << 15);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (15 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (15 - 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 15);
+  *out = (inl >> 13) % (1U << 15);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (15 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (15 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 15);
+  *out = (inl >> 11) % (1U << 15);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (15 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (15 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 15);
+  *out = (inl >> 9) % (1U << 15);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (15 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (15 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 15);
+  *out = (inl >> 7) % (1U << 15);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (15 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (15 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 15);
+  *out = (inl >> 5) % (1U << 15);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (15 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (15 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 15);
+  *out = (inl >> 3) % (1U << 15);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (15 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (15 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 15);
+  *out = (inl >> 1) % (1U << 15);
   out++;
-  *out = ((*in) >> 16) % (1U << 15);
+  *out = (inl >> 16) % (1U << 15);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (15 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (15 - 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 15);
+  *out = (inl >> 14) % (1U << 15);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (15 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (15 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 15);
+  *out = (inl >> 12) % (1U << 15);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (15 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (15 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 15);
+  *out = (inl >> 10) % (1U << 15);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (15 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (15 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 15);
+  *out = (inl >> 8) % (1U << 15);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (15 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (15 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 15);
+  *out = (inl >> 6) % (1U << 15);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (15 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (15 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 15);
+  *out = (inl >> 4) % (1U << 15);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (15 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (15 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 15);
+  *out = (inl >> 2) % (1U << 15);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
   out++;
 
@@ -1276,84 +1397,100 @@ inline const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 16);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 16);
+  *out = (inl >> 0) % (1U << 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
   out++;
 
@@ -1361,101 +1498,118 @@ inline const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 17);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (17 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (17 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 17);
+  *out = (inl >> 2) % (1U << 17);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (17 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (17 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 17);
+  *out = (inl >> 4) % (1U << 17);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (17 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (17 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 17);
+  *out = (inl >> 6) % (1U << 17);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (17 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (17 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 17);
+  *out = (inl >> 8) % (1U << 17);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (17 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (17 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 17);
+  *out = (inl >> 10) % (1U << 17);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (17 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (17 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 17);
+  *out = (inl >> 12) % (1U << 17);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (17 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (17 - 14);
   out++;
-  *out = ((*in) >> 14) % (1U << 17);
+  *out = (inl >> 14) % (1U << 17);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (17 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (17 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (17 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (17 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 17);
+  *out = (inl >> 1) % (1U << 17);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (17 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (17 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 17);
+  *out = (inl >> 3) % (1U << 17);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (17 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (17 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 17);
+  *out = (inl >> 5) % (1U << 17);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (17 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (17 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 17);
+  *out = (inl >> 7) % (1U << 17);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (17 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (17 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 17);
+  *out = (inl >> 9) % (1U << 17);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (17 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (17 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 17);
+  *out = (inl >> 11) % (1U << 17);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (17 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (17 - 13);
   out++;
-  *out = ((*in) >> 13) % (1U << 17);
+  *out = (inl >> 13) % (1U << 17);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (17 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (17 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
   out++;
 
@@ -1463,102 +1617,120 @@ inline const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 18);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (18 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (18 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 18);
+  *out = (inl >> 4) % (1U << 18);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (18 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (18 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 18);
+  *out = (inl >> 8) % (1U << 18);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (18 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (18 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 18);
+  *out = (inl >> 12) % (1U << 18);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (18 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (18 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (18 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (18 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 18);
+  *out = (inl >> 2) % (1U << 18);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (18 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (18 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 18);
+  *out = (inl >> 6) % (1U << 18);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (18 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (18 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 18);
+  *out = (inl >> 10) % (1U << 18);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (18 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (18 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 18);
+  *out = (inl >> 0) % (1U << 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (18 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (18 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 18);
+  *out = (inl >> 4) % (1U << 18);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (18 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (18 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 18);
+  *out = (inl >> 8) % (1U << 18);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (18 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (18 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 18);
+  *out = (inl >> 12) % (1U << 18);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (18 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (18 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (18 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (18 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 18);
+  *out = (inl >> 2) % (1U << 18);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (18 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (18 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 18);
+  *out = (inl >> 6) % (1U << 18);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (18 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (18 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 18);
+  *out = (inl >> 10) % (1U << 18);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (18 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (18 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
   out++;
 
@@ -1566,105 +1738,124 @@ inline const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 19);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (19 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (19 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 19);
+  *out = (inl >> 6) % (1U << 19);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (19 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (19 - 12);
   out++;
-  *out = ((*in) >> 12) % (1U << 19);
+  *out = (inl >> 12) % (1U << 19);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (19 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (19 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (19 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (19 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 19);
+  *out = (inl >> 5) % (1U << 19);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (19 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (19 - 11);
   out++;
-  *out = ((*in) >> 11) % (1U << 19);
+  *out = (inl >> 11) % (1U << 19);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (19 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (19 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (19 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (19 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 19);
+  *out = (inl >> 4) % (1U << 19);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (19 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (19 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 19);
+  *out = (inl >> 10) % (1U << 19);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (19 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (19 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (19 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (19 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 19);
+  *out = (inl >> 3) % (1U << 19);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (19 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (19 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 19);
+  *out = (inl >> 9) % (1U << 19);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (19 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (19 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (19 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (19 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 19);
+  *out = (inl >> 2) % (1U << 19);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (19 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (19 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 19);
+  *out = (inl >> 8) % (1U << 19);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (19 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (19 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (19 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (19 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 19);
+  *out = (inl >> 1) % (1U << 19);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (19 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (19 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 19);
+  *out = (inl >> 7) % (1U << 19);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (19 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (19 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
   out++;
 
@@ -1672,104 +1863,124 @@ inline const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 20);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 20);
+  *out = (inl >> 0) % (1U << 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (20 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (20 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 20);
+  *out = (inl >> 8) % (1U << 20);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (20 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (20 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (20 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (20 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 20);
+  *out = (inl >> 4) % (1U << 20);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (20 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (20 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
   out++;
 
@@ -1777,109 +1988,130 @@ inline const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 21);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (21 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (21 - 10);
   out++;
-  *out = ((*in) >> 10) % (1U << 21);
+  *out = (inl >> 10) % (1U << 21);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (21 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (21 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (21 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (21 - 9);
   out++;
-  *out = ((*in) >> 9) % (1U << 21);
+  *out = (inl >> 9) % (1U << 21);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (21 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (21 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (21 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (21 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 21);
+  *out = (inl >> 8) % (1U << 21);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (21 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (21 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (21 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (21 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 21);
+  *out = (inl >> 7) % (1U << 21);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (21 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (21 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (21 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (21 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 21);
+  *out = (inl >> 6) % (1U << 21);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (21 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (21 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (21 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (21 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 21);
+  *out = (inl >> 5) % (1U << 21);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (21 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (21 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (21 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (21 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 21);
+  *out = (inl >> 4) % (1U << 21);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (21 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (21 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (21 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (21 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 21);
+  *out = (inl >> 3) % (1U << 21);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (21 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (21 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (21 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (21 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 21);
+  *out = (inl >> 2) % (1U << 21);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (21 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (21 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (21 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (21 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 21);
+  *out = (inl >> 1) % (1U << 21);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (21 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (21 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
   out++;
 
@@ -1887,110 +2119,132 @@ inline const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 22);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (22 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (22 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (22 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (22 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 22);
+  *out = (inl >> 2) % (1U << 22);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (22 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (22 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (22 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (22 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 22);
+  *out = (inl >> 4) % (1U << 22);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (22 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (22 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (22 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (22 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 22);
+  *out = (inl >> 6) % (1U << 22);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (22 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (22 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (22 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (22 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 22);
+  *out = (inl >> 8) % (1U << 22);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (22 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (22 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (22 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (22 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 22);
+  *out = (inl >> 0) % (1U << 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (22 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (22 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (22 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (22 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 22);
+  *out = (inl >> 2) % (1U << 22);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (22 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (22 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (22 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (22 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 22);
+  *out = (inl >> 4) % (1U << 22);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (22 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (22 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (22 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (22 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 22);
+  *out = (inl >> 6) % (1U << 22);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (22 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (22 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (22 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (22 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 22);
+  *out = (inl >> 8) % (1U << 22);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (22 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (22 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (22 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (22 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
   out++;
 
@@ -1998,113 +2252,136 @@ inline const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 23);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (23 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (23 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (23 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (23 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 23);
+  *out = (inl >> 5) % (1U << 23);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (23 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (23 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (23 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (23 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (23 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (23 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 23);
+  *out = (inl >> 1) % (1U << 23);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (23 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (23 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (23 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (23 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 23);
+  *out = (inl >> 6) % (1U << 23);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (23 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (23 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (23 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (23 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (23 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (23 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 23);
+  *out = (inl >> 2) % (1U << 23);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (23 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (23 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (23 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (23 - 7);
   out++;
-  *out = ((*in) >> 7) % (1U << 23);
+  *out = (inl >> 7) % (1U << 23);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (23 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (23 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (23 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (23 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (23 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (23 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 23);
+  *out = (inl >> 3) % (1U << 23);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (23 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (23 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (23 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (23 - 8);
   out++;
-  *out = ((*in) >> 8) % (1U << 23);
+  *out = (inl >> 8) % (1U << 23);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (23 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (23 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (23 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (23 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (23 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (23 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 23);
+  *out = (inl >> 4) % (1U << 23);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (23 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (23 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (23 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (23 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
   out++;
 
@@ -2112,108 +2389,132 @@ inline const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 24);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 24);
+  *out = (inl >> 0) % (1U << 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (24 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (24 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (24 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (24 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
   out++;
 
@@ -2221,117 +2522,142 @@ inline const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 25);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (25 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (25 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (25 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (25 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (25 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (25 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 25);
+  *out = (inl >> 4) % (1U << 25);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (25 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (25 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (25 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (25 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (25 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (25 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (25 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (25 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 25);
+  *out = (inl >> 1) % (1U << 25);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (25 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (25 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (25 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (25 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (25 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (25 - 5);
   out++;
-  *out = ((*in) >> 5) % (1U << 25);
+  *out = (inl >> 5) % (1U << 25);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (25 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (25 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (25 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (25 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (25 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (25 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (25 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (25 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 25);
+  *out = (inl >> 2) % (1U << 25);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (25 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (25 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (25 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (25 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (25 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (25 - 6);
   out++;
-  *out = ((*in) >> 6) % (1U << 25);
+  *out = (inl >> 6) % (1U << 25);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (25 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (25 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (25 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (25 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (25 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (25 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (25 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (25 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 25);
+  *out = (inl >> 3) % (1U << 25);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (25 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (25 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (25 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (25 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (25 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (25 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
   out++;
 
@@ -2339,118 +2665,144 @@ inline const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 26);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (26 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (26 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (26 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (26 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (26 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (26 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (26 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (26 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 26);
+  *out = (inl >> 2) % (1U << 26);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (26 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (26 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (26 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (26 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (26 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (26 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (26 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (26 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 26);
+  *out = (inl >> 4) % (1U << 26);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (26 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (26 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (26 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (26 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (26 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (26 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (26 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (26 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 26);
+  *out = (inl >> 0) % (1U << 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (26 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (26 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (26 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (26 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (26 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (26 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (26 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (26 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 26);
+  *out = (inl >> 2) % (1U << 26);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (26 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (26 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (26 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (26 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (26 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (26 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (26 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (26 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 26);
+  *out = (inl >> 4) % (1U << 26);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (26 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (26 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (26 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (26 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (26 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (26 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (26 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (26 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
   out++;
 
@@ -2458,121 +2810,148 @@ inline const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 27);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (27 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (27 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (27 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (27 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (27 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (27 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (27 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (27 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (27 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (27 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 27);
+  *out = (inl >> 2) % (1U << 27);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (27 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (27 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (27 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (27 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (27 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (27 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (27 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (27 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (27 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (27 - 4);
   out++;
-  *out = ((*in) >> 4) % (1U << 27);
+  *out = (inl >> 4) % (1U << 27);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (27 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (27 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (27 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (27 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (27 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (27 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (27 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (27 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (27 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (27 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (27 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (27 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 27);
+  *out = (inl >> 1) % (1U << 27);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (27 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (27 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (27 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (27 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (27 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (27 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (27 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (27 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (27 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (27 - 3);
   out++;
-  *out = ((*in) >> 3) % (1U << 27);
+  *out = (inl >> 3) % (1U << 27);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (27 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (27 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (27 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (27 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (27 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (27 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (27 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (27 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (27 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (27 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
   out++;
 
@@ -2580,120 +2959,148 @@ inline const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 28);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 28);
+  *out = (inl >> 0) % (1U << 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (28 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (28 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (28 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (28 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (28 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (28 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (28 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (28 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (28 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (28 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (28 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (28 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
   out++;
 
@@ -2701,125 +3108,154 @@ inline const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 29);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 29);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (29 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (29 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (29 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (29 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (29 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (29 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (29 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (29 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (29 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (29 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (29 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (29 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (29 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (29 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (29 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (29 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (29 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (29 - 2);
   out++;
-  *out = ((*in) >> 2) % (1U << 29);
+  *out = (inl >> 2) % (1U << 29);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (29 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (29 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (29 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (29 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (29 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (29 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (29 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (29 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (29 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (29 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (29 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (29 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (29 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (29 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (29 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (29 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (29 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (29 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (29 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (29 - 1);
   out++;
-  *out = ((*in) >> 1) % (1U << 29);
+  *out = (inl >> 1) % (1U << 29);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 27)) << (29 - 27);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 27)) << (29 - 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (29 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (29 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (29 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (29 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (29 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (29 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (29 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (29 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (29 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (29 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (29 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (29 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (29 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (29 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (29 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (29 - 3);
   out++;
-  *out = ((*in) >> 3);
+  *out = (inl >> 3);
   ++in;
   out++;
 
@@ -2827,126 +3263,156 @@ inline const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 30);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (30 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (30 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (30 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (30 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (30 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (30 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (30 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (30 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (30 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (30 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (30 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (30 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (30 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (30 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (30 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (30 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (30 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (30 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (30 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (30 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (30 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (30 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (30 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (30 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (30 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (30 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (30 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (30 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0) % (1U << 30);
+  *out = (inl >> 0) % (1U << 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (30 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (30 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (30 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (30 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (30 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (30 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (30 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (30 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (30 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (30 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (30 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (30 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (30 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (30 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (30 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (30 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (30 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (30 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (30 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (30 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (30 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (30 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (30 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (30 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (30 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (30 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (30 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (30 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
   out++;
 
@@ -2954,129 +3420,160 @@ inline const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0) % (1U << 31);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0) % (1U << 31);
   out++;
-  *out = ((*in) >> 31);
+  *out = (inl >> 31);
   ++in;
-  *out |= ((*in) % (1U << 30)) << (31 - 30);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 30)) << (31 - 30);
   out++;
-  *out = ((*in) >> 30);
+  *out = (inl >> 30);
   ++in;
-  *out |= ((*in) % (1U << 29)) << (31 - 29);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 29)) << (31 - 29);
   out++;
-  *out = ((*in) >> 29);
+  *out = (inl >> 29);
   ++in;
-  *out |= ((*in) % (1U << 28)) << (31 - 28);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 28)) << (31 - 28);
   out++;
-  *out = ((*in) >> 28);
+  *out = (inl >> 28);
   ++in;
-  *out |= ((*in) % (1U << 27)) << (31 - 27);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 27)) << (31 - 27);
   out++;
-  *out = ((*in) >> 27);
+  *out = (inl >> 27);
   ++in;
-  *out |= ((*in) % (1U << 26)) << (31 - 26);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 26)) << (31 - 26);
   out++;
-  *out = ((*in) >> 26);
+  *out = (inl >> 26);
   ++in;
-  *out |= ((*in) % (1U << 25)) << (31 - 25);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 25)) << (31 - 25);
   out++;
-  *out = ((*in) >> 25);
+  *out = (inl >> 25);
   ++in;
-  *out |= ((*in) % (1U << 24)) << (31 - 24);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 24)) << (31 - 24);
   out++;
-  *out = ((*in) >> 24);
+  *out = (inl >> 24);
   ++in;
-  *out |= ((*in) % (1U << 23)) << (31 - 23);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 23)) << (31 - 23);
   out++;
-  *out = ((*in) >> 23);
+  *out = (inl >> 23);
   ++in;
-  *out |= ((*in) % (1U << 22)) << (31 - 22);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 22)) << (31 - 22);
   out++;
-  *out = ((*in) >> 22);
+  *out = (inl >> 22);
   ++in;
-  *out |= ((*in) % (1U << 21)) << (31 - 21);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 21)) << (31 - 21);
   out++;
-  *out = ((*in) >> 21);
+  *out = (inl >> 21);
   ++in;
-  *out |= ((*in) % (1U << 20)) << (31 - 20);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 20)) << (31 - 20);
   out++;
-  *out = ((*in) >> 20);
+  *out = (inl >> 20);
   ++in;
-  *out |= ((*in) % (1U << 19)) << (31 - 19);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 19)) << (31 - 19);
   out++;
-  *out = ((*in) >> 19);
+  *out = (inl >> 19);
   ++in;
-  *out |= ((*in) % (1U << 18)) << (31 - 18);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 18)) << (31 - 18);
   out++;
-  *out = ((*in) >> 18);
+  *out = (inl >> 18);
   ++in;
-  *out |= ((*in) % (1U << 17)) << (31 - 17);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 17)) << (31 - 17);
   out++;
-  *out = ((*in) >> 17);
+  *out = (inl >> 17);
   ++in;
-  *out |= ((*in) % (1U << 16)) << (31 - 16);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 16)) << (31 - 16);
   out++;
-  *out = ((*in) >> 16);
+  *out = (inl >> 16);
   ++in;
-  *out |= ((*in) % (1U << 15)) << (31 - 15);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 15)) << (31 - 15);
   out++;
-  *out = ((*in) >> 15);
+  *out = (inl >> 15);
   ++in;
-  *out |= ((*in) % (1U << 14)) << (31 - 14);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 14)) << (31 - 14);
   out++;
-  *out = ((*in) >> 14);
+  *out = (inl >> 14);
   ++in;
-  *out |= ((*in) % (1U << 13)) << (31 - 13);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 13)) << (31 - 13);
   out++;
-  *out = ((*in) >> 13);
+  *out = (inl >> 13);
   ++in;
-  *out |= ((*in) % (1U << 12)) << (31 - 12);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 12)) << (31 - 12);
   out++;
-  *out = ((*in) >> 12);
+  *out = (inl >> 12);
   ++in;
-  *out |= ((*in) % (1U << 11)) << (31 - 11);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 11)) << (31 - 11);
   out++;
-  *out = ((*in) >> 11);
+  *out = (inl >> 11);
   ++in;
-  *out |= ((*in) % (1U << 10)) << (31 - 10);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 10)) << (31 - 10);
   out++;
-  *out = ((*in) >> 10);
+  *out = (inl >> 10);
   ++in;
-  *out |= ((*in) % (1U << 9)) << (31 - 9);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 9)) << (31 - 9);
   out++;
-  *out = ((*in) >> 9);
+  *out = (inl >> 9);
   ++in;
-  *out |= ((*in) % (1U << 8)) << (31 - 8);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 8)) << (31 - 8);
   out++;
-  *out = ((*in) >> 8);
+  *out = (inl >> 8);
   ++in;
-  *out |= ((*in) % (1U << 7)) << (31 - 7);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 7)) << (31 - 7);
   out++;
-  *out = ((*in) >> 7);
+  *out = (inl >> 7);
   ++in;
-  *out |= ((*in) % (1U << 6)) << (31 - 6);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 6)) << (31 - 6);
   out++;
-  *out = ((*in) >> 6);
+  *out = (inl >> 6);
   ++in;
-  *out |= ((*in) % (1U << 5)) << (31 - 5);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 5)) << (31 - 5);
   out++;
-  *out = ((*in) >> 5);
+  *out = (inl >> 5);
   ++in;
-  *out |= ((*in) % (1U << 4)) << (31 - 4);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 4)) << (31 - 4);
   out++;
-  *out = ((*in) >> 4);
+  *out = (inl >> 4);
   ++in;
-  *out |= ((*in) % (1U << 3)) << (31 - 3);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 3)) << (31 - 3);
   out++;
-  *out = ((*in) >> 3);
+  *out = (inl >> 3);
   ++in;
-  *out |= ((*in) % (1U << 2)) << (31 - 2);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 2)) << (31 - 2);
   out++;
-  *out = ((*in) >> 2);
+  *out = (inl >> 2);
   ++in;
-  *out |= ((*in) % (1U << 1)) << (31 - 1);
+  inl = util::SafeLoad(in);
+  *out |= (inl % (1U << 1)) << (31 - 1);
   out++;
-  *out = ((*in) >> 1);
+  *out = (inl >> 1);
   ++in;
   out++;
 
@@ -3084,100 +3581,132 @@ inline const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
 }
 
 inline const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
-  *out = ((*in) >> 0);
+  uint32_t inl = util::SafeLoad(in);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
+  inl = util::SafeLoad(in);
   out++;
-  *out = ((*in) >> 0);
+  *out = (inl >> 0);
   ++in;
   out++;
 
diff --git a/cpp/src/arrow/util/compression-benchmark.cc b/cpp/src/arrow/util/compression-benchmark.cc
index 28bc1255c4b..5700b0dd0e2 100644
--- a/cpp/src/arrow/util/compression-benchmark.cc
+++ b/cpp/src/arrow/util/compression-benchmark.cc
@@ -25,6 +25,7 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace util {
diff --git a/cpp/src/arrow/util/decimal-benchmark.cc b/cpp/src/arrow/util/decimal-benchmark.cc
index 41cb86127d1..620593cf7d8 100644
--- a/cpp/src/arrow/util/decimal-benchmark.cc
+++ b/cpp/src/arrow/util/decimal-benchmark.cc
@@ -34,7 +34,7 @@ static void FromString(benchmark::State& state) {  // NOLINT non-const reference
                                      "123456789.123456789",
                                      "1231234567890.451234567890"};
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     for (const auto& value : values) {
       Decimal128 dec;
       int32_t scale, precision;
@@ -44,57 +44,127 @@ static void FromString(benchmark::State& state) {  // NOLINT non-const reference
   state.SetItemsProcessed(state.iterations() * values.size());
 }
 
+constexpr int32_t kValueSize = 10;
+
 static void BinaryCompareOp(benchmark::State& state) {  // NOLINT non-const reference
-  BasicDecimal128 d1(546, 123), d2(123, 456);
-  while (state.KeepRunning()) {
-    benchmark::DoNotOptimize(d1 == d2);
-    benchmark::DoNotOptimize(d1 <= d2);
-    benchmark::DoNotOptimize(d1 >= d2);
-    benchmark::DoNotOptimize(d1 >= d1);
+  std::vector<BasicDecimal128> v1, v2;
+  for (int x = 0; x < kValueSize; x++) {
+    v1.emplace_back(100 + x, 100 + x);
+    v2.emplace_back(200 + x, 200 + x);
+  }
+  for (auto _ : state) {
+    for (int x = 0; x < kValueSize; x += 4) {
+      benchmark::DoNotOptimize(v1[x] == v2[x]);
+      benchmark::DoNotOptimize(v1[x + 1] <= v2[x + 1]);
+      benchmark::DoNotOptimize(v1[x + 2] >= v2[x + 2]);
+      benchmark::DoNotOptimize(v1[x + 3] >= v1[x + 3]);
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
+}
+
+static void BinaryCompareOpConstant(
+    benchmark::State& state) {  // NOLINT non-const reference
+  std::vector<BasicDecimal128> v1;
+  for (int x = 0; x < kValueSize; x++) {
+    v1.emplace_back(100 + x, 100 + x);
+  }
+  BasicDecimal128 constant(313, 212);
+  for (auto _ : state) {
+    for (int x = 0; x < kValueSize; x += 4) {
+      benchmark::DoNotOptimize(v1[x] == constant);
+      benchmark::DoNotOptimize(v1[x + 1] <= constant);
+      benchmark::DoNotOptimize(v1[x + 2] >= constant);
+      benchmark::DoNotOptimize(v1[x + 3] != constant);
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
+}
+
+static void BinaryMathOpAggregate(
+    benchmark::State& state) {  // NOLINT non-const reference
+  std::vector<BasicDecimal128> v;
+  for (int x = 0; x < kValueSize; x++) {
+    v.emplace_back(100 + x, 100 + x);
+  }
+
+  for (auto _ : state) {
+    BasicDecimal128 result;
+    for (int x = 0; x < 100; x++) {
+      result += v[x];
+    }
+    benchmark::DoNotOptimize(result);
   }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
 }
 
 static void BinaryMathOp(benchmark::State& state) {  // NOLINT non-const reference
-  BasicDecimal128 d1(546, 123), d2(123, 456), d3(0, 10);
-  while (state.KeepRunning()) {
-    benchmark::DoNotOptimize(d1 - d2);
-    benchmark::DoNotOptimize(d1 + d2);
-    benchmark::DoNotOptimize(d1 * d2);
-    benchmark::DoNotOptimize(d1 / d2);
-    benchmark::DoNotOptimize(d1 % d3);
+  std::vector<BasicDecimal128> v1, v2;
+  for (int x = 0; x < kValueSize; x++) {
+    v1.emplace_back(100 + x, 100 + x);
+    v2.emplace_back(200 + x, 200 + x);
+  }
+
+  for (auto _ : state) {
+    for (int x = 0; x < kValueSize; x += 5) {
+      benchmark::DoNotOptimize(v1[x] - v2[x]);
+      benchmark::DoNotOptimize(v1[x + 1] + v2[x + 1]);
+      benchmark::DoNotOptimize(v1[x + 2] * v2[x + 2]);
+      benchmark::DoNotOptimize(v1[x + 3] / v2[x + 3]);
+      benchmark::DoNotOptimize(v1[x + 4] % v2[x + 4]);
+    }
   }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
 }
 
 static void UnaryOp(benchmark::State& state) {  // NOLINT non-const reference
-  BasicDecimal128 d1(-546, 123), d2(-123, 456);
-  while (state.KeepRunning()) {
-    benchmark::DoNotOptimize(d1.Abs());
-    benchmark::DoNotOptimize(d2.Negate());
+  std::vector<BasicDecimal128> v;
+  for (int x = 0; x < kValueSize; x++) {
+    v.emplace_back(100 + x, 100 + x);
   }
+
+  for (auto _ : state) {
+    for (int x = 0; x < kValueSize; x += 2) {
+      benchmark::DoNotOptimize(v[x].Abs());
+      benchmark::DoNotOptimize(v[x + 1].Negate());
+    }
+  }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
 }
 
 static void Constants(benchmark::State& state) {  // NOLINT non-const reference
   BasicDecimal128 d1(-546, 123), d2(-123, 456);
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     benchmark::DoNotOptimize(BasicDecimal128::GetMaxValue() - d1);
     benchmark::DoNotOptimize(BasicDecimal128::GetScaleMultiplier(3) + d2);
   }
+  state.SetItemsProcessed(state.iterations() * 2);
 }
 
 static void BinaryBitOp(benchmark::State& state) {  // NOLINT non-const reference
-  BasicDecimal128 d1(546, 123), d2(123, 456);
-  while (state.KeepRunning()) {
-    benchmark::DoNotOptimize(d1 |= d2);
-    benchmark::DoNotOptimize(d1 &= d2);
+  std::vector<BasicDecimal128> v1, v2;
+  for (int x = 0; x < kValueSize; x++) {
+    v1.emplace_back(100 + x, 100 + x);
+    v2.emplace_back(200 + x, 200 + x);
+  }
+
+  for (auto _ : state) {
+    for (int x = 0; x < kValueSize; x += 2) {
+      benchmark::DoNotOptimize(v1[x] |= v2[x]);
+      benchmark::DoNotOptimize(v1[x + 1] &= v2[x + 1]);
+    }
   }
+  state.SetItemsProcessed(state.iterations() * kValueSize);
 }
 
 BENCHMARK(FromString);
 BENCHMARK(BinaryMathOp);
+BENCHMARK(BinaryMathOpAggregate);
 BENCHMARK(BinaryCompareOp);
-BENCHMARK(BinaryBitOp);
+BENCHMARK(BinaryCompareOpConstant);
 BENCHMARK(UnaryOp);
 BENCHMARK(Constants);
+BENCHMARK(BinaryBitOp);
 
 }  // namespace Decimal
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 46928b20e09..00beffd2450 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -214,7 +214,7 @@ namespace {
 struct DecimalComponents {
   util::string_view whole_digits;
   util::string_view fractional_digits;
-  int32_t exponent;
+  int32_t exponent = 0;
   char sign = 0;
   bool has_exponent = false;
 };
diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h
index 7aed3c171dc..4d33786c095 100644
--- a/cpp/src/arrow/util/hash-util.h
+++ b/cpp/src/arrow/util/hash-util.h
@@ -71,6 +71,86 @@ class HashUtil {
   static constexpr bool have_hardware_crc32 = false;
 #endif
 
+#ifdef ARROW_HAVE_ARMV8_CRYPTO
+/* Crc32c Parallel computation
+ *   Algorithm comes from Intel whitepaper:
+ *   crc-iscsi-polynomial-crc32-instruction-paper
+ *
+ * Input data is divided into three equal-sized blocks
+ *   Three parallel blocks (crc0, crc1, crc2) for 1024 Bytes
+ *   One Block: 42(BLK_LENGTH) * 8(step length: crc32c_u64) bytes
+ */
+#define BLK_LENGTH 42
+  static uint32_t Armv8CrcHashParallel(const void* data, int32_t nbytes, uint32_t crc) {
+    const uint8_t* buf8;
+    const uint64_t* buf64 = reinterpret_cast<const uint64_t*>(data);
+    int32_t length = nbytes;
+
+    while (length >= 1024) {
+      uint64_t t0, t1;
+      uint32_t crc0 = 0, crc1 = 0, crc2 = 0;
+
+      /* parallel computation params:
+       *   k0 = CRC32(x ^ (42 * 8 * 8 * 2 - 1));
+       *   k1 = CRC32(x ^ (42 * 8 * 8 - 1));
+       */
+      uint32_t k0 = 0xe417f38a, k1 = 0x8f158014;
+
+      /* First 8 byte for better pipelining */
+      crc0 = ARMCE_crc32_u64(crc, *buf64++);
+
+      /* 3 blocks crc32c parallel computation
+       *
+       * 42 * 8 * 3 = 1008 (bytes)
+       */
+      for (int i = 0; i < BLK_LENGTH; i++, buf64++) {
+        crc0 = ARMCE_crc32_u64(crc0, *buf64);
+        crc1 = ARMCE_crc32_u64(crc1, *(buf64 + BLK_LENGTH));
+        crc2 = ARMCE_crc32_u64(crc2, *(buf64 + (BLK_LENGTH * 2)));
+      }
+      buf64 += (BLK_LENGTH * 2);
+
+      /* Last 8 bytes */
+      crc = ARMCE_crc32_u64(crc2, *buf64++);
+
+      t0 = (uint64_t)vmull_p64(crc0, k0);
+      t1 = (uint64_t)vmull_p64(crc1, k1);
+
+      /* Merge (crc0, crc1, crc2) -> crc */
+      crc1 = ARMCE_crc32_u64(0, t1);
+      crc ^= crc1;
+      crc0 = ARMCE_crc32_u64(0, t0);
+      crc ^= crc0;
+
+      length -= 1024;
+    }
+
+    buf8 = reinterpret_cast<const uint8_t*>(buf64);
+    while (length >= 8) {
+      crc = ARMCE_crc32_u64(crc, *reinterpret_cast<const uint64_t*>(buf8));
+      buf8 += 8;
+      length -= 8;
+    }
+
+    /* The following is more efficient than the straight loop */
+    if (length >= 4) {
+      crc = ARMCE_crc32_u32(crc, *reinterpret_cast<const uint32_t*>(buf8));
+      buf8 += 4;
+      length -= 4;
+    }
+
+    if (length >= 2) {
+      crc = ARMCE_crc32_u16(crc, *reinterpret_cast<const uint16_t*>(buf8));
+      buf8 += 2;
+      length -= 2;
+    }
+
+    if (length >= 1) crc = ARMCE_crc32_u8(crc, *(buf8));
+
+    return crc;
+  }
+#endif
+
   /// Compute the Crc32 hash for data using SSE4/ArmCRC instructions.  The input hash
   /// parameter is the current hash/seed value.
   /// This should only be called if SSE/ArmCRC is supported.
@@ -295,8 +375,14 @@ inline int HashUtil::Hash<true>(const void* data, int32_t bytes, uint32_t seed)
     return static_cast<int>(HashUtil::MurmurHash2_64(data, bytes, seed));
   else
 #endif
-    // Double CRC
-    return static_cast<int>(HashUtil::DoubleCrcHash(data, bytes, seed));
+
+#ifdef ARROW_HAVE_ARMV8_CRYPTO
+    // Arm64 parallel crc32
+    return static_cast<int>(HashUtil::Armv8CrcHashParallel(data, bytes, seed));
+#else
+  // Double CRC
+  return static_cast<int>(HashUtil::DoubleCrcHash(data, bytes, seed));
+#endif
 }
 
 // Murmur Hash
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 49641d81c08..bad2b49905e 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -149,9 +149,8 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
       // the results
       uint32_t x, y;
       hash_t hx, hy;
-      // XXX those are unaligned accesses.  Should we have a facility for that?
-      x = *reinterpret_cast<const uint32_t*>(p + n - 4);
-      y = *reinterpret_cast<const uint32_t*>(p);
+      x = util::SafeLoadAs<uint32_t>(p + n - 4);
+      y = util::SafeLoadAs<uint32_t>(p);
       hx = ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
       hy = ScalarHelper<uint32_t, AlgNum ^ 1>::ComputeHash(y);
       return n ^ hx ^ hy;
@@ -160,16 +159,20 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
     // Apply the same principle as above
     uint64_t x, y;
     hash_t hx, hy;
-    x = *reinterpret_cast<const uint64_t*>(p + n - 8);
-    y = *reinterpret_cast<const uint64_t*>(p);
+    x = util::SafeLoadAs<uint64_t>(p + n - 8);
+    y = util::SafeLoadAs<uint64_t>(p);
     hx = ScalarHelper<uint64_t, AlgNum>::ComputeHash(x);
     hy = ScalarHelper<uint64_t, AlgNum ^ 1>::ComputeHash(y);
     return n ^ hx ^ hy;
   }
 
   if (HashUtil::have_hardware_crc32) {
+#ifdef ARROW_HAVE_ARMV8_CRYPTO
+    auto h = HashUtil::Armv8CrcHashParallel(data, static_cast<int32_t>(length), AlgNum);
+#else
     // DoubleCrcHash is faster that Murmur2.
     auto h = HashUtil::DoubleCrcHash(data, static_cast<int32_t>(length), AlgNum);
+#endif
     return ScalarHelper<uint64_t, AlgNum>::ComputeHash(h);
   } else {
     // Fall back on 64-bit Murmur2 for longer strings.
@@ -721,6 +724,7 @@ class BinaryMemoTable : public MemoTable {
 #ifndef NDEBUG
     int64_t data_length = values_.size() - static_cast<size_t>(left_offset);
     assert(data_length + width_size == out_size);
+    ARROW_UNUSED(data_length);
 #endif
 
     auto in_data = values_.data() + left_offset;
diff --git a/cpp/src/arrow/util/io-util.cc b/cpp/src/arrow/util/io-util.cc
index c092769d81f..58072b61381 100644
--- a/cpp/src/arrow/util/io-util.cc
+++ b/cpp/src/arrow/util/io-util.cc
@@ -564,8 +564,9 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
     return StatusFromErrno("Cannot get file handle: ");
   }
 
-  LONG new_size_low = static_cast<LONG>(new_size & 0xFFFFFFFFL);
-  LONG new_size_high = static_cast<LONG>((new_size >> 32) & 0xFFFFFFFFL);
+  uint64_t new_size64 = new_size;
+  LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
+  LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
 
   SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
   SetEndOfFile(h);
@@ -892,7 +893,7 @@ Status TemporaryDir::Make(const std::string& prefix, std::unique_ptr<TemporaryDi
   BOOST_FILESYSTEM_CATCH
 
   PlatformFilename fn(path.native());
-  bool created;
+  bool created = false;
   RETURN_NOT_OK(CreateDir(fn, &created));
   if (!created) {
     // XXX Should we retry?
diff --git a/cpp/src/arrow/util/neon-util.h b/cpp/src/arrow/util/neon-util.h
index 714d2324f05..4c28aa34707 100644
--- a/cpp/src/arrow/util/neon-util.h
+++ b/cpp/src/arrow/util/neon-util.h
@@ -20,11 +20,19 @@
 namespace arrow {
 
 #if defined(__aarch64__) || defined(__AARCH64__)
+
 #ifdef __ARM_FEATURE_CRC32
 #define ARROW_HAVE_ARM_CRC
 #include <arm_acle.h>
-#endif
-#endif
+
+#ifdef __ARM_FEATURE_CRYPTO
+#include <arm_neon.h>
+#define ARROW_HAVE_ARMV8_CRYPTO
+#endif  // __ARM_FEATURE_CRYPTO
+
+#endif  // __ARM_FEATURE_CRC32
+
+#endif  // defined(__aarch64__) || defined(__AARCH64__)
 
 #if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC)
 
diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index 48898140bf1..f1b1e182c14 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -59,14 +59,14 @@ inline std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t i
 
 template <typename T>
 inline std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
-                                       const T& new_element) {
+                                       T new_element) {
   DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size() + 1);
   for (size_t i = 0; i < index; ++i) {
     out.push_back(values[i]);
   }
-  out.push_back(new_element);
+  out.emplace_back(std::move(new_element));
   for (size_t i = index; i < values.size(); ++i) {
     out.push_back(values[i]);
   }
@@ -75,14 +75,14 @@ inline std::vector<T> AddVectorElement(const std::vector<T>& values, size_t inde
 
 template <typename T>
 inline std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
-                                           const T& new_element) {
+                                           T new_element) {
   DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size());
   for (size_t i = 0; i < index; ++i) {
     out.push_back(values[i]);
   }
-  out.push_back(new_element);
+  out.emplace_back(std::move(new_element));
   for (size_t i = index + 1; i < values.size(); ++i) {
     out.push_back(values[i]);
   }
diff --git a/cpp/src/arrow/util/ubsan.h b/cpp/src/arrow/util/ubsan.h
index f9fcfb54022..fe5322a3e83 100644
--- a/cpp/src/arrow/util/ubsan.h
+++ b/cpp/src/arrow/util/ubsan.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include <cstring>
 #include <memory>
 
 #include "arrow/util/macros.h"
@@ -49,5 +50,21 @@ inline T* MakeNonNull(T* maybe_null) {
   return reinterpret_cast<T*>(&internal::non_null_filler);
 }
 
+template <typename T>
+inline typename std::enable_if<std::is_integral<T>::value, T>::type SafeLoadAs(
+    const uint8_t* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_integral<T>::value, T>::type SafeLoad(
+    const T* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/cpp/src/arrow/visitor.cc b/cpp/src/arrow/visitor.cc
index 53b341b53d7..2ec6c6421d0 100644
--- a/cpp/src/arrow/visitor.cc
+++ b/cpp/src/arrow/visitor.cc
@@ -47,6 +47,8 @@ ARRAY_VISITOR_DEFAULT(FloatArray)
 ARRAY_VISITOR_DEFAULT(DoubleArray)
 ARRAY_VISITOR_DEFAULT(BinaryArray)
 ARRAY_VISITOR_DEFAULT(StringArray)
+ARRAY_VISITOR_DEFAULT(LargeBinaryArray)
+ARRAY_VISITOR_DEFAULT(LargeStringArray)
 ARRAY_VISITOR_DEFAULT(FixedSizeBinaryArray)
 ARRAY_VISITOR_DEFAULT(Date32Array)
 ARRAY_VISITOR_DEFAULT(Date64Array)
@@ -90,6 +92,8 @@ TYPE_VISITOR_DEFAULT(FloatType)
 TYPE_VISITOR_DEFAULT(DoubleType)
 TYPE_VISITOR_DEFAULT(StringType)
 TYPE_VISITOR_DEFAULT(BinaryType)
+TYPE_VISITOR_DEFAULT(LargeStringType)
+TYPE_VISITOR_DEFAULT(LargeBinaryType)
 TYPE_VISITOR_DEFAULT(FixedSizeBinaryType)
 TYPE_VISITOR_DEFAULT(Date64Type)
 TYPE_VISITOR_DEFAULT(Date32Type)
@@ -134,6 +138,8 @@ SCALAR_VISITOR_DEFAULT(FloatScalar)
 SCALAR_VISITOR_DEFAULT(DoubleScalar)
 SCALAR_VISITOR_DEFAULT(StringScalar)
 SCALAR_VISITOR_DEFAULT(BinaryScalar)
+SCALAR_VISITOR_DEFAULT(LargeStringScalar)
+SCALAR_VISITOR_DEFAULT(LargeBinaryScalar)
 SCALAR_VISITOR_DEFAULT(FixedSizeBinaryScalar)
 SCALAR_VISITOR_DEFAULT(Date64Scalar)
 SCALAR_VISITOR_DEFAULT(Date32Scalar)
diff --git a/cpp/src/arrow/visitor.h b/cpp/src/arrow/visitor.h
index a4979e9cef8..1c854c47804 100644
--- a/cpp/src/arrow/visitor.h
+++ b/cpp/src/arrow/visitor.h
@@ -43,6 +43,8 @@ class ARROW_EXPORT ArrayVisitor {
   virtual Status Visit(const DoubleArray& array);
   virtual Status Visit(const StringArray& array);
   virtual Status Visit(const BinaryArray& array);
+  virtual Status Visit(const LargeStringArray& array);
+  virtual Status Visit(const LargeBinaryArray& array);
   virtual Status Visit(const FixedSizeBinaryArray& array);
   virtual Status Visit(const Date32Array& array);
   virtual Status Visit(const Date64Array& array);
@@ -81,6 +83,8 @@ class ARROW_EXPORT TypeVisitor {
   virtual Status Visit(const DoubleType& type);
   virtual Status Visit(const StringType& type);
   virtual Status Visit(const BinaryType& type);
+  virtual Status Visit(const LargeStringType& type);
+  virtual Status Visit(const LargeBinaryType& type);
   virtual Status Visit(const FixedSizeBinaryType& type);
   virtual Status Visit(const Date64Type& type);
   virtual Status Visit(const Date32Type& type);
@@ -119,6 +123,8 @@ class ARROW_EXPORT ScalarVisitor {
   virtual Status Visit(const DoubleScalar& scalar);
   virtual Status Visit(const StringScalar& scalar);
   virtual Status Visit(const BinaryScalar& scalar);
+  virtual Status Visit(const LargeStringScalar& scalar);
+  virtual Status Visit(const LargeBinaryScalar& scalar);
   virtual Status Visit(const FixedSizeBinaryScalar& scalar);
   virtual Status Visit(const Date64Scalar& scalar);
   virtual Status Visit(const Date32Scalar& scalar);
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index 544763a2f74..3ed058e6492 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -47,6 +47,8 @@ namespace arrow {
   ACTION(Double);                            \
   ACTION(String);                            \
   ACTION(Binary);                            \
+  ACTION(LargeString);                       \
+  ACTION(LargeBinary);                       \
   ACTION(FixedSizeBinary);                   \
   ACTION(Duration);                          \
   ACTION(Date32);                            \
@@ -186,12 +188,13 @@ struct ArrayDataVisitor<T, enable_if_has_c_type<T>> {
 };
 
 template <typename T>
-struct ArrayDataVisitor<T, enable_if_binary<T>> {
+struct ArrayDataVisitor<T, enable_if_base_binary<T>> {
   template <typename Visitor>
   static Status Visit(const ArrayData& arr, Visitor* visitor) {
+    using offset_type = typename T::offset_type;
     constexpr uint8_t empty_value = 0;
 
-    const int32_t* offsets = arr.GetValues<int32_t>(1);
+    const offset_type* offsets = arr.GetValues<offset_type>(1);
     const uint8_t* data;
     if (!arr.buffers[2]) {
       data = &empty_value;
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 468ecc7e41e..df0c54e9c44 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -95,6 +95,15 @@ endif()
 #     set(GANDIVA_SHARED_LINK_FLAGS "${GANDIVA_SHARED_LINK_FLAGS} /EXPORT:${SYMBOL}")
 #   endforeach()
 # endif()
+if(NOT APPLE AND NOT MSVC)
+  # Localize thirdparty symbols using a linker version script. This hides them
+  # from the client application. The OS X linker does not support the
+  # version-script option.
+  set(GANDIVA_VERSION_SCRIPT_FLAGS
+      "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
+  set(GANDIVA_SHARED_LINK_FLAGS
+      "${GANDIVA_SHARED_LINK_FLAGS} ${GANDIVA_VERSION_SCRIPT_FLAGS}")
+endif()
 
 add_arrow_lib(gandiva
               SOURCES
diff --git a/cpp/src/gandiva/annotator.cc b/cpp/src/gandiva/annotator.cc
index 754d70e0e04..0eab915d351 100644
--- a/cpp/src/gandiva/annotator.cc
+++ b/cpp/src/gandiva/annotator.cc
@@ -31,30 +31,35 @@ FieldDescriptorPtr Annotator::CheckAndAddInputFieldDescriptor(FieldPtr field) {
     return found->second;
   }
 
-  auto desc = MakeDesc(field);
+  auto desc = MakeDesc(field, false /*is_output*/);
   in_name_to_desc_[field->name()] = desc;
   return desc;
 }
 
 FieldDescriptorPtr Annotator::AddOutputFieldDescriptor(FieldPtr field) {
-  auto desc = MakeDesc(field);
+  auto desc = MakeDesc(field, true /*is_output*/);
   out_descs_.push_back(desc);
   return desc;
 }
 
-FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field) {
+FieldDescriptorPtr Annotator::MakeDesc(FieldPtr field, bool is_output) {
   int data_idx = buffer_count_++;
   int validity_idx = buffer_count_++;
   int offsets_idx = FieldDescriptor::kInvalidIdx;
   if (arrow::is_binary_like(field->type()->id())) {
     offsets_idx = buffer_count_++;
   }
-  return std::make_shared<FieldDescriptor>(field, data_idx, validity_idx, offsets_idx);
+  int data_buffer_ptr_idx = FieldDescriptor::kInvalidIdx;
+  if (is_output) {
+    data_buffer_ptr_idx = buffer_count_++;
+  }
+  return std::make_shared<FieldDescriptor>(field, data_idx, validity_idx, offsets_idx,
+                                           data_buffer_ptr_idx);
 }
 
 void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
                                        const arrow::ArrayData& array_data,
-                                       EvalBatch* eval_batch) {
+                                       EvalBatch* eval_batch, bool is_output) {
   int buffer_idx = 0;
 
   // The validity buffer is optional. Use nullptr if it does not have one.
@@ -74,7 +79,12 @@ void Annotator::PrepareBuffersForField(const FieldDescriptor& desc,
 
   uint8_t* data_buf = const_cast<uint8_t*>(array_data.buffers[buffer_idx]->data());
   eval_batch->SetBuffer(desc.data_idx(), data_buf);
-  ++buffer_idx;
+  if (is_output) {
+    // pass in the Buffer object for output data buffers. Can be used for resizing.
+    uint8_t* data_buf_ptr =
+        reinterpret_cast<uint8_t*>(array_data.buffers[buffer_idx].get());
+    eval_batch->SetBuffer(desc.data_buffer_ptr_idx(), data_buf_ptr);
+  }
 }
 
 EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
@@ -92,14 +102,14 @@ EvalBatchPtr Annotator::PrepareEvalBatch(const arrow::RecordBatch& record_batch,
     }
 
     PrepareBuffersForField(*(found->second), *(record_batch.column(i))->data(),
-                           eval_batch.get());
+                           eval_batch.get(), false /*is_output*/);
   }
 
   // Fill in the entries for the output fields.
   int idx = 0;
   for (auto& arraydata : out_vector) {
     const FieldDescriptorPtr& desc = out_descs_.at(idx);
-    PrepareBuffersForField(*desc, *arraydata, eval_batch.get());
+    PrepareBuffersForField(*desc, *arraydata, eval_batch.get(), true /*is_output*/);
     ++idx;
   }
   return eval_batch;
diff --git a/cpp/src/gandiva/annotator.h b/cpp/src/gandiva/annotator.h
index c0ddc024635..dcf665c04a5 100644
--- a/cpp/src/gandiva/annotator.h
+++ b/cpp/src/gandiva/annotator.h
@@ -54,12 +54,13 @@ class GANDIVA_EXPORT Annotator {
 
  private:
   /// Annotate a field and return the descriptor.
-  FieldDescriptorPtr MakeDesc(FieldPtr field);
+  FieldDescriptorPtr MakeDesc(FieldPtr field, bool is_output);
 
   /// Populate eval_batch by extracting the raw buffers from the arrow array, whose
   /// contents are represent by the annotated descriptor 'desc'.
   void PrepareBuffersForField(const FieldDescriptor& desc,
-                              const arrow::ArrayData& array_data, EvalBatch* eval_batch);
+                              const arrow::ArrayData& array_data, EvalBatch* eval_batch,
+                              bool is_output);
 
   /// The list of input/output buffers (includes bitmap buffers, value buffers and
   /// offset buffers).
diff --git a/cpp/src/gandiva/annotator_test.cc b/cpp/src/gandiva/annotator_test.cc
index dabf4e65990..cd829f75c51 100644
--- a/cpp/src/gandiva/annotator_test.cc
+++ b/cpp/src/gandiva/annotator_test.cc
@@ -73,6 +73,7 @@ TEST_F(TestAnnotator, TestAdd) {
   EXPECT_EQ(desc_sum->field(), field_sum);
   EXPECT_EQ(desc_sum->data_idx(), 4);
   EXPECT_EQ(desc_sum->validity_idx(), 5);
+  EXPECT_EQ(desc_sum->data_buffer_ptr_idx(), 6);
 
   // prepare record batch
   int num_records = 100;
@@ -85,7 +86,7 @@ TEST_F(TestAnnotator, TestAdd) {
 
   auto arrow_sum = MakeInt32Array(num_records);
   EvalBatchPtr batch = annotator.PrepareEvalBatch(*record_batch, {arrow_sum->data()});
-  EXPECT_EQ(batch->GetNumBuffers(), 6);
+  EXPECT_EQ(batch->GetNumBuffers(), 7);
 
   auto buffers = batch->GetBufferArray();
   EXPECT_EQ(buffers[desc_a->validity_idx()], arrow_v0->data()->buffers.at(0)->data());
@@ -94,6 +95,8 @@ TEST_F(TestAnnotator, TestAdd) {
   EXPECT_EQ(buffers[desc_b->data_idx()], arrow_v1->data()->buffers.at(1)->data());
   EXPECT_EQ(buffers[desc_sum->validity_idx()], arrow_sum->data()->buffers.at(0)->data());
   EXPECT_EQ(buffers[desc_sum->data_idx()], arrow_sum->data()->buffers.at(1)->data());
+  EXPECT_EQ(buffers[desc_sum->data_buffer_ptr_idx()],
+            reinterpret_cast<uint8_t*>(arrow_sum->data()->buffers.at(1).get()));
 
   auto bitmaps = batch->GetLocalBitMapArray();
   EXPECT_EQ(bitmaps, nullptr);
diff --git a/cpp/src/gandiva/decimal_ir.cc b/cpp/src/gandiva/decimal_ir.cc
index 6e4bb56bdfa..d9ea4e0da5c 100644
--- a/cpp/src/gandiva/decimal_ir.cc
+++ b/cpp/src/gandiva/decimal_ir.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <sstream>
+#include <unordered_set>
 #include <utility>
 
 #include "arrow/status.h"
@@ -35,6 +36,21 @@ namespace gandiva {
     AddTrace128(msg, value);      \
   }
 
+// These are the functions defined in this file. The rest are in precompiled folder,
+// and the i128 needs to be dis-assembled for those.
+static const char* kAddFunction = "add_decimal128_decimal128";
+static const char* kSubtractFunction = "subtract_decimal128_decimal128";
+static const char* kEQFunction = "equal_decimal128_decimal128";
+static const char* kNEFunction = "not_equal_decimal128_decimal128";
+static const char* kLTFunction = "less_than_decimal128_decimal128";
+static const char* kLEFunction = "less_than_or_equal_to_decimal128_decimal128";
+static const char* kGTFunction = "greater_than_decimal128_decimal128";
+static const char* kGEFunction = "greater_than_or_equal_to_decimal128_decimal128";
+
+static const std::unordered_set<std::string> kDecimalIRBuilderFunctions{
+    kAddFunction, kSubtractFunction, kEQFunction, kNEFunction,
+    kLTFunction,  kLEFunction,       kGTFunction, kGEFunction};
+
 const char* DecimalIR::kScaleMultipliersName = "gandivaScaleMultipliers";
 
 /// Populate globals required by decimal IR.
@@ -250,7 +266,7 @@ Status DecimalIR::BuildAdd() {
   //                           int32_t out_precision, int32_t out_scale)
   auto i32 = types()->i32_type();
   auto i128 = types()->i128_type();
-  auto function = BuildFunction("add_decimal128_decimal128", i128,
+  auto function = BuildFunction(kAddFunction, i128,
                                 {
                                     {"x_value", i128},
                                     {"x_precision", i32},
@@ -316,7 +332,7 @@ Status DecimalIR::BuildSubtract() {
   //                           int32_t out_precision, int32_t out_scale)
   auto i32 = types()->i32_type();
   auto i128 = types()->i128_type();
-  auto function = BuildFunction("subtract_decimal128_decimal128", i128,
+  auto function = BuildFunction(kSubtractFunction, i128,
                                 {
                                     {"x_value", i128},
                                     {"x_precision", i32},
@@ -345,121 +361,13 @@ Status DecimalIR::BuildSubtract() {
     }
     ++i;
   }
-  auto value =
-      ir_builder()->CreateCall(module()->getFunction("add_decimal128_decimal128"), args);
+  auto value = ir_builder()->CreateCall(module()->getFunction(kAddFunction), args);
 
   // store result to out
   ir_builder()->CreateRet(value);
   return Status::OK();
 }
 
-Status DecimalIR::BuildMultiply() {
-  // Create fn prototype :
-  // int128_t
-  // multiply_decimal128_decimal128(int128_t x_value, int32_t x_precision, int32_t
-  // x_scale,
-  //                           int128_t y_value, int32_t y_precision, int32_t y_scale
-  //                           int32_t out_precision, int32_t out_scale)
-  auto i32 = types()->i32_type();
-  auto i128 = types()->i128_type();
-  auto function = BuildFunction("multiply_decimal128_decimal128", i128,
-                                {
-                                    {"x_value", i128},
-                                    {"x_precision", i32},
-                                    {"x_scale", i32},
-                                    {"y_value", i128},
-                                    {"y_precision", i32},
-                                    {"y_scale", i32},
-                                    {"out_precision", i32},
-                                    {"out_scale", i32},
-                                });
-
-  auto arg_iter = function->arg_begin();
-  ValueFull x(&arg_iter[0], &arg_iter[1], &arg_iter[2]);
-  ValueFull y(&arg_iter[3], &arg_iter[4], &arg_iter[5]);
-  ValueFull out(nullptr, &arg_iter[6], &arg_iter[7]);
-
-  auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
-  ir_builder()->SetInsertPoint(entry);
-
-  // Make call to pre-compiled IR function.
-  auto block = ir_builder()->GetInsertBlock();
-  auto out_high_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_hi", block);
-  auto out_low_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_low", block);
-  auto x_split = ValueSplit::MakeFromInt128(this, x.value());
-  auto y_split = ValueSplit::MakeFromInt128(this, y.value());
-
-  std::vector<llvm::Value*> args = {
-      x_split.high(),  x_split.low(), x.precision(), x.scale(),
-      y_split.high(),  y_split.low(), y.precision(), y.scale(),
-      out.precision(), out.scale(),   out_high_ptr,  out_low_ptr,
-  };
-  ir_builder()->CreateCall(
-      module()->getFunction("multiply_internal_decimal128_decimal128"), args);
-
-  auto out_high = ir_builder()->CreateLoad(out_high_ptr);
-  auto out_low = ir_builder()->CreateLoad(out_low_ptr);
-  auto result = ValueSplit(out_high, out_low).AsInt128(this);
-  ADD_TRACE_128("Multiply : result", result);
-
-  ir_builder()->CreateRet(result);
-  return Status::OK();
-}
-
-Status DecimalIR::BuildDivideOrMod(const std::string& function_name,
-                                   const std::string& internal_fname) {
-  // Create fn prototype :
-  // int128_t
-  // divide_decimal128_decimal128(int64_t execution_context,
-  //                              int128_t x_value, int32_t x_precision, int32_t x_scale,
-  //                              int128_t y_value, int32_t y_precision, int32_t y_scale
-  //                              int32_t out_precision, int32_t out_scale)
-  auto i32 = types()->i32_type();
-  auto i128 = types()->i128_type();
-  auto function = BuildFunction(function_name, i128,
-                                {
-                                    {"execution_context", types()->i64_type()},
-                                    {"x_value", i128},
-                                    {"x_precision", i32},
-                                    {"x_scale", i32},
-                                    {"y_value", i128},
-                                    {"y_precision", i32},
-                                    {"y_scale", i32},
-                                    {"out_precision", i32},
-                                    {"out_scale", i32},
-                                });
-
-  auto arg_iter = function->arg_begin();
-  auto execution_context = &arg_iter[0];
-  ValueFull x(&arg_iter[1], &arg_iter[2], &arg_iter[3]);
-  ValueFull y(&arg_iter[4], &arg_iter[5], &arg_iter[6]);
-  ValueFull out(nullptr, &arg_iter[7], &arg_iter[8]);
-
-  auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
-  ir_builder()->SetInsertPoint(entry);
-
-  // Make call to pre-compiled IR function.
-  auto block = ir_builder()->GetInsertBlock();
-  auto out_high_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_hi", block);
-  auto out_low_ptr = new llvm::AllocaInst(types()->i64_type(), 0, "out_low", block);
-  auto x_split = ValueSplit::MakeFromInt128(this, x.value());
-  auto y_split = ValueSplit::MakeFromInt128(this, y.value());
-
-  std::vector<llvm::Value*> args = {
-      execution_context, x_split.high(), x_split.low(), x.precision(), x.scale(),
-      y_split.high(),    y_split.low(),  y.precision(), y.scale(),     out.precision(),
-      out.scale(),       out_high_ptr,   out_low_ptr,
-  };
-  ir_builder()->CreateCall(module()->getFunction(internal_fname), args);
-
-  auto out_high = ir_builder()->CreateLoad(out_high_ptr);
-  auto out_low = ir_builder()->CreateLoad(out_low_ptr);
-  auto result = ValueSplit(out_high, out_low).AsInt128(this);
-
-  ir_builder()->CreateRet(result);
-  return Status::OK();
-}
-
 Status DecimalIR::BuildCompare(const std::string& function_name,
                                llvm::ICmpInst::Predicate cmp_instruction) {
   // Create fn prototype :
@@ -495,50 +403,47 @@ Status DecimalIR::BuildCompare(const std::string& function_name,
       y_split.high(), y_split.low(), y.precision(), y.scale(),
   };
   auto cmp_value = ir_builder()->CreateCall(
-      module()->getFunction("compare_internal_decimal128_decimal128"), args);
+      module()->getFunction("compare_decimal128_decimal128_internal"), args);
   auto result =
       ir_builder()->CreateICmp(cmp_instruction, cmp_value, types()->i32_constant(0));
   ir_builder()->CreateRet(result);
   return Status::OK();
 }
 
-Status DecimalIR::BuildDecimalFunction(const std::string& function_name,
-                                       llvm::Type* return_type,
-                                       std::vector<NamedArg> in_types) {
-  auto i64 = types()->i64_type();
-  auto i128 = types()->i128_type();
-  auto function = BuildFunction(function_name, return_type, in_types);
-
-  auto entry = llvm::BasicBlock::Create(*context(), "entry", function);
-  ir_builder()->SetInsertPoint(entry);
+llvm::Value* DecimalIR::CallDecimalFunction(const std::string& function_name,
+                                            llvm::Type* return_type,
+                                            const std::vector<llvm::Value*>& params) {
+  if (kDecimalIRBuilderFunctions.count(function_name) != 0) {
+    // this is fn built with the irbuilder.
+    return ir_builder()->CreateCall(module()->getFunction(function_name), params);
+  }
 
-  std::vector<llvm::Value*> args;
-  int arg_idx = 0;
-  auto arg_iter = function->arg_begin();
-  for (auto& type : in_types) {
-    if (type.type == i128) {
+  // ppre-compiler fn : disassemble i128 to two i64s and re-assemble.
+  auto i128 = types()->i128_type();
+  auto i64 = types()->i64_type();
+  std::vector<llvm::Value*> dis_assembled_args;
+  for (auto& arg : params) {
+    if (arg->getType() == i128) {
       // split i128 arg into two int64s.
-      auto split = ValueSplit::MakeFromInt128(this, &arg_iter[arg_idx]);
-      args.push_back(split.high());
-      args.push_back(split.low());
+      auto split = ValueSplit::MakeFromInt128(this, arg);
+      dis_assembled_args.push_back(split.high());
+      dis_assembled_args.push_back(split.low());
     } else {
-      args.push_back(&arg_iter[arg_idx]);
+      dis_assembled_args.push_back(arg);
     }
-    ++arg_idx;
   }
 
-  auto internal_name = function_name + "_internal";
   llvm::Value* result = nullptr;
   if (return_type == i128) {
     // for i128 ret, replace with two int64* args, and join them.
     auto block = ir_builder()->GetInsertBlock();
     auto out_high_ptr = new llvm::AllocaInst(i64, 0, "out_hi", block);
     auto out_low_ptr = new llvm::AllocaInst(i64, 0, "out_low", block);
-    args.push_back(out_high_ptr);
-    args.push_back(out_low_ptr);
+    dis_assembled_args.push_back(out_high_ptr);
+    dis_assembled_args.push_back(out_low_ptr);
 
     // Make call to pre-compiled IR function.
-    ir_builder()->CreateCall(module()->getFunction(internal_name), args);
+    ir_builder()->CreateCall(module()->getFunction(function_name), dis_assembled_args);
 
     auto out_high = ir_builder()->CreateLoad(out_high_ptr);
     auto out_low = ir_builder()->CreateLoad(out_low_ptr);
@@ -547,19 +452,14 @@ Status DecimalIR::BuildDecimalFunction(const std::string& function_name,
     DCHECK_NE(return_type, types()->void_type());
 
     // Make call to pre-compiled IR function.
-    result = ir_builder()->CreateCall(module()->getFunction(internal_name), args);
+    result = ir_builder()->CreateCall(module()->getFunction(function_name),
+                                      dis_assembled_args);
   }
-  ir_builder()->CreateRet(result);
-  return Status::OK();
+  return result;
 }
 
 Status DecimalIR::AddFunctions(Engine* engine) {
   auto decimal_ir = std::make_shared<DecimalIR>(engine);
-  auto i128 = decimal_ir->types()->i128_type();
-  auto i32 = decimal_ir->types()->i32_type();
-  auto i1 = decimal_ir->types()->i1_type();
-  auto i64 = decimal_ir->types()->i64_type();
-  auto f64 = decimal_ir->types()->double_type();
 
   // Populate global variables used by decimal operations.
   decimal_ir->AddGlobals(engine);
@@ -569,259 +469,12 @@ Status DecimalIR::AddFunctions(Engine* engine) {
 
   ARROW_RETURN_NOT_OK(decimal_ir->BuildAdd());
   ARROW_RETURN_NOT_OK(decimal_ir->BuildSubtract());
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildMultiply());
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDivideOrMod(
-      "divide_decimal128_decimal128", "divide_internal_decimal128_decimal128"));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDivideOrMod("mod_decimal128_decimal128",
-                                                   "mod_internal_decimal128_decimal128"));
-
-  ARROW_RETURN_NOT_OK(
-      decimal_ir->BuildCompare("equal_decimal128_decimal128", llvm::ICmpInst::ICMP_EQ));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare("not_equal_decimal128_decimal128",
-                                               llvm::ICmpInst::ICMP_NE));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare("less_than_decimal128_decimal128",
-                                               llvm::ICmpInst::ICMP_SLT));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(
-      "less_than_or_equal_to_decimal128_decimal128", llvm::ICmpInst::ICMP_SLE));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare("greater_than_decimal128_decimal128",
-                                               llvm::ICmpInst::ICMP_SGT));
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(
-      "greater_than_or_equal_to_decimal128_decimal128", llvm::ICmpInst::ICMP_SGE));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("abs_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("ceil_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("floor_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("round_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("round_decimal128_int32", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"round_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("truncate_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("truncate_decimal128_int32", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"round_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("castDECIMAL_int64", i128,
-                                                       {
-                                                           {"value", i64},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("castDECIMAL_float64", i128,
-                                                       {
-                                                           {"value", f64},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("castDECIMAL_decimal128", i128,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"out_precision", i32},
-                                                           {"out_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("castBIGINT_decimal128", i64,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("castFLOAT8_decimal128", f64,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash_decimal128", i32,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32_decimal128", i32,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64_decimal128", i64,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32WithSeed_decimal128", i32,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                           {"seed", i32},
-                                                           {"seed_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64WithSeed_decimal128", i64,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                           {"seed", i64},
-                                                           {"seed_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash32AsDouble_decimal128", i32,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("hash64AsDouble_decimal128", i64,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(
-      decimal_ir->BuildDecimalFunction("hash32AsDoubleWithSeed_decimal128", i32,
-                                       {
-                                           {"x_value", i128},
-                                           {"x_precision", i32},
-                                           {"x_scale", i32},
-                                           {"x_isvalid", i1},
-                                           {"seed", i32},
-                                           {"seed_isvalid", i1},
-                                       }));
-
-  ARROW_RETURN_NOT_OK(
-      decimal_ir->BuildDecimalFunction("hash64AsDoubleWithSeed_decimal128", i64,
-                                       {
-                                           {"x_value", i128},
-                                           {"x_precision", i32},
-                                           {"x_scale", i32},
-                                           {"x_isvalid", i1},
-                                           {"seed", i64},
-                                           {"seed_isvalid", i1},
-                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnull_decimal128", i1,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnotnull_decimal128", i1,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(decimal_ir->BuildDecimalFunction("isnumeric_decimal128", i1,
-                                                       {
-                                                           {"x_value", i128},
-                                                           {"x_precision", i32},
-                                                           {"x_scale", i32},
-                                                           {"x_isvalid", i1},
-                                                       }));
-
-  ARROW_RETURN_NOT_OK(
-      decimal_ir->BuildDecimalFunction("is_distinct_from_decimal128_decimal128", i1,
-                                       {
-                                           {"x_value", i128},
-                                           {"x_precision", i32},
-                                           {"x_scale", i32},
-                                           {"x_isvalid", i1},
-                                           {"y_value", i128},
-                                           {"y_precision", i32},
-                                           {"y_scale", i32},
-                                           {"y_isvalid", i1},
-                                       }));
-
-  ARROW_RETURN_NOT_OK(
-      decimal_ir->BuildDecimalFunction("is_not_distinct_from_decimal128_decimal128", i1,
-                                       {
-                                           {"x_value", i128},
-                                           {"x_precision", i32},
-                                           {"x_scale", i32},
-                                           {"x_isvalid", i1},
-                                           {"y_value", i128},
-                                           {"y_precision", i32},
-                                           {"y_scale", i32},
-                                           {"y_isvalid", i1},
-                                       }));
-
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kEQFunction, llvm::ICmpInst::ICMP_EQ));
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kNEFunction, llvm::ICmpInst::ICMP_NE));
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kLTFunction, llvm::ICmpInst::ICMP_SLT));
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kLEFunction, llvm::ICmpInst::ICMP_SLE));
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kGTFunction, llvm::ICmpInst::ICMP_SGT));
+  ARROW_RETURN_NOT_OK(decimal_ir->BuildCompare(kGEFunction, llvm::ICmpInst::ICMP_SGE));
   return Status::OK();
 }
 
diff --git a/cpp/src/gandiva/decimal_ir.h b/cpp/src/gandiva/decimal_ir.h
index c2a22b3f1c0..6503c825e3e 100644
--- a/cpp/src/gandiva/decimal_ir.h
+++ b/cpp/src/gandiva/decimal_ir.h
@@ -37,6 +37,10 @@ class DecimalIR : public FunctionIRBuilder {
 
   void EnableTraces() { enable_ir_traces_ = true; }
 
+  llvm::Value* CallDecimalFunction(const std::string& function_name,
+                                   llvm::Type* return_type,
+                                   const std::vector<llvm::Value*>& args);
+
  private:
   /// The intrinsic fn for divide with small divisors is about 10x slower, so not
   /// using these.
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index 923841c0bf9..bce43d53c8c 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -89,6 +89,12 @@ Status ExprValidator::Visit(const IfNode& node) {
   auto then_node_ret_type = node.then_node()->return_type();
   auto else_node_ret_type = node.else_node()->return_type();
 
+  // condition must be of boolean type.
+  ARROW_RETURN_IF(
+      !node.condition()->return_type()->Equals(arrow::boolean()),
+      Status::ExpressionValidationError("condition must be of boolean type, found type ",
+                                        node.condition()->return_type()->ToString()));
+
   // Then-branch return type must match.
   ARROW_RETURN_IF(!if_node_ret_type->Equals(*then_node_ret_type),
                   Status::ExpressionValidationError(
diff --git a/cpp/src/gandiva/expression_registry.cc b/cpp/src/gandiva/expression_registry.cc
index d0629635530..b884bf6eb30 100644
--- a/cpp/src/gandiva/expression_registry.cc
+++ b/cpp/src/gandiva/expression_registry.cc
@@ -30,28 +30,51 @@ ExpressionRegistry::ExpressionRegistry() {
 
 ExpressionRegistry::~ExpressionRegistry() {}
 
+// to be used only to create function_signature_start
+ExpressionRegistry::FunctionSignatureIterator::FunctionSignatureIterator(
+    native_func_iterator_type nf_it, native_func_iterator_type nf_it_end)
+    : native_func_it_{nf_it},
+      native_func_it_end_{nf_it_end},
+      func_sig_it_{&(nf_it->signatures().front())} {}
+
+// to be used only to create function_signature_end
+ExpressionRegistry::FunctionSignatureIterator::FunctionSignatureIterator(
+    func_sig_iterator_type fs_it)
+    : native_func_it_{nullptr}, native_func_it_end_{nullptr}, func_sig_it_{fs_it} {}
+
 const ExpressionRegistry::FunctionSignatureIterator
 ExpressionRegistry::function_signature_begin() {
-  return FunctionSignatureIterator(function_registry_->begin());
+  return FunctionSignatureIterator(function_registry_->begin(),
+                                   function_registry_->end());
 }
 
 const ExpressionRegistry::FunctionSignatureIterator
 ExpressionRegistry::function_signature_end() const {
-  return FunctionSignatureIterator(function_registry_->end());
+  return FunctionSignatureIterator(&(*(function_registry_->back()->signatures().end())));
 }
 
 bool ExpressionRegistry::FunctionSignatureIterator::operator!=(
     const FunctionSignatureIterator& func_sign_it) {
-  return func_sign_it.it_ != this->it_;
+  return func_sign_it.func_sig_it_ != this->func_sig_it_;
 }
 
 FunctionSignature ExpressionRegistry::FunctionSignatureIterator::operator*() {
-  return (*it_).signature();
+  return *func_sig_it_;
 }
 
-ExpressionRegistry::iterator ExpressionRegistry::FunctionSignatureIterator::operator++(
-    int increment) {
-  return it_++;
+ExpressionRegistry::func_sig_iterator_type ExpressionRegistry::FunctionSignatureIterator::
+operator++(int increment) {
+  ++func_sig_it_;
+  // point func_sig_it_ to first signature of next nativefunction if func_sig_it_ is
+  // pointing to end
+  if (func_sig_it_ == &(*native_func_it_->signatures().end())) {
+    ++native_func_it_;
+    if (native_func_it_ == native_func_it_end_) {  // last native function
+      return func_sig_it_;
+    }
+    func_sig_it_ = &(native_func_it_->signatures().front());
+  }
+  return func_sig_it_;
 }
 
 DataTypeVector ExpressionRegistry::supported_types_ =
diff --git a/cpp/src/gandiva/expression_registry.h b/cpp/src/gandiva/expression_registry.h
index 97197f29004..bcebbe674f3 100644
--- a/cpp/src/gandiva/expression_registry.h
+++ b/cpp/src/gandiva/expression_registry.h
@@ -36,22 +36,27 @@ class FunctionRegistry;
 /// data types and functions supported by Gandiva.
 class GANDIVA_EXPORT ExpressionRegistry {
  public:
-  using iterator = const NativeFunction*;
+  using native_func_iterator_type = const NativeFunction*;
+  using func_sig_iterator_type = const FunctionSignature*;
   ExpressionRegistry();
   ~ExpressionRegistry();
   static DataTypeVector supported_types() { return supported_types_; }
   class GANDIVA_EXPORT FunctionSignatureIterator {
    public:
-    explicit FunctionSignatureIterator(iterator it) : it_(it) {}
+    explicit FunctionSignatureIterator(native_func_iterator_type nf_it,
+                                       native_func_iterator_type nf_it_end_);
+    explicit FunctionSignatureIterator(func_sig_iterator_type fs_it);
 
     bool operator!=(const FunctionSignatureIterator& func_sign_it);
 
     FunctionSignature operator*();
 
-    iterator operator++(int);
+    func_sig_iterator_type operator++(int);
 
    private:
-    iterator it_;
+    native_func_iterator_type native_func_it_;
+    const native_func_iterator_type native_func_it_end_;
+    func_sig_iterator_type func_sig_it_;
   };
   const FunctionSignatureIterator function_signature_begin();
   const FunctionSignatureIterator function_signature_end() const;
diff --git a/cpp/src/gandiva/expression_registry_test.cc b/cpp/src/gandiva/expression_registry_test.cc
index c50e136d69d..c254ff4f3aa 100644
--- a/cpp/src/gandiva/expression_registry_test.cc
+++ b/cpp/src/gandiva/expression_registry_test.cc
@@ -43,10 +43,11 @@ TEST_F(TestExpressionRegistry, VerifySupportedFunctions) {
     functions.push_back((*iter));
   }
   for (auto& iter : registry_) {
-    auto function = iter.signature();
-    auto element = std::find(functions.begin(), functions.end(), function);
-    EXPECT_NE(element, functions.end())
-        << "function " << iter.pc_name() << " missing in supported functions.\n";
+    for (auto& func_iter : iter.signatures()) {
+      auto element = std::find(functions.begin(), functions.end(), func_iter);
+      EXPECT_NE(element, functions.end()) << "function signature " << func_iter.ToString()
+                                          << " missing in supported functions.\n";
+    }
   }
 }
 
diff --git a/cpp/src/gandiva/field_descriptor.h b/cpp/src/gandiva/field_descriptor.h
index 70583b0405b..d931f378ff2 100644
--- a/cpp/src/gandiva/field_descriptor.h
+++ b/cpp/src/gandiva/field_descriptor.h
@@ -31,11 +31,12 @@ class FieldDescriptor {
   static const int kInvalidIdx = -1;
 
   FieldDescriptor(FieldPtr field, int data_idx, int validity_idx = kInvalidIdx,
-                  int offsets_idx = kInvalidIdx)
+                  int offsets_idx = kInvalidIdx, int data_buffer_ptr_idx = kInvalidIdx)
       : field_(field),
         data_idx_(data_idx),
         validity_idx_(validity_idx),
-        offsets_idx_(offsets_idx) {}
+        offsets_idx_(offsets_idx),
+        data_buffer_ptr_idx_(data_buffer_ptr_idx) {}
 
   /// Index of validity array in the array-of-buffers
   int validity_idx() const { return validity_idx_; }
@@ -46,6 +47,9 @@ class FieldDescriptor {
   /// Index of offsets array in the array-of-buffers
   int offsets_idx() const { return offsets_idx_; }
 
+  /// Index of data buffer pointer in the array-of-buffers
+  int data_buffer_ptr_idx() const { return data_buffer_ptr_idx_; }
+
   FieldPtr field() const { return field_; }
 
   const std::string& Name() const { return field_->name(); }
@@ -53,11 +57,14 @@ class FieldDescriptor {
 
   bool HasOffsetsIdx() const { return offsets_idx_ != kInvalidIdx; }
 
+  bool HasDataBufferPtrIdx() const { return data_buffer_ptr_idx_ != kInvalidIdx; }
+
  private:
   FieldPtr field_;
   int data_idx_;
   int validity_idx_;
   int offsets_idx_;
+  int data_buffer_ptr_idx_;
 };
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc
index 43eda4dee77..d5d015c10b4 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -37,6 +37,10 @@ FunctionRegistry::iterator FunctionRegistry::end() const {
   return &(*pc_registry_.end());
 }
 
+FunctionRegistry::iterator FunctionRegistry::back() const {
+  return &(pc_registry_.back());
+}
+
 std::vector<NativeFunction> FunctionRegistry::pc_registry_;
 
 SignatureMap FunctionRegistry::pc_registry_map_ = InitPCMap();
@@ -62,7 +66,9 @@ SignatureMap FunctionRegistry::InitPCMap() {
   pc_registry_.insert(std::end(pc_registry_), v6.begin(), v6.end());
 
   for (auto& elem : pc_registry_) {
-    map.insert(std::make_pair(&(elem.signature()), &elem));
+    for (auto& func_signature : elem.signatures()) {
+      map.insert(std::make_pair(&(func_signature), &elem));
+    }
   }
 
   return map;
diff --git a/cpp/src/gandiva/function_registry.h b/cpp/src/gandiva/function_registry.h
index f7aa3de4bb5..dadc8318a77 100644
--- a/cpp/src/gandiva/function_registry.h
+++ b/cpp/src/gandiva/function_registry.h
@@ -36,6 +36,7 @@ class GANDIVA_EXPORT FunctionRegistry {
 
   iterator begin() const;
   iterator end() const;
+  iterator back() const;
 
  private:
   static SignatureMap InitPCMap();
diff --git a/cpp/src/gandiva/function_registry_arithmetic.cc b/cpp/src/gandiva/function_registry_arithmetic.cc
index b6c58198baf..74e528acecb 100644
--- a/cpp/src/gandiva/function_registry_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_arithmetic.cc
@@ -20,23 +20,24 @@
 
 namespace gandiva {
 
-#define BINARY_SYMMETRIC_FN(name) NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, name)
+#define BINARY_SYMMETRIC_FN(name, ALIASES) \
+  NUMERIC_TYPES(BINARY_SYMMETRIC_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define BINARY_RELATIONAL_BOOL_FN(name) \
-  NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+#define BINARY_RELATIONAL_BOOL_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define BINARY_RELATIONAL_BOOL_DATE_FN(name) \
-  NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+#define BINARY_RELATIONAL_BOOL_DATE_FN(name, ALIASES) \
+  NUMERIC_DATE_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, name, float64)
+#define UNARY_CAST_TO_FLOAT64(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT8, {}, name, float64)
 
-#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, name, float32)
+#define UNARY_CAST_TO_FLOAT32(name) UNARY_SAFE_NULL_IF_NULL(castFLOAT4, {}, name, float32)
 
 std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
   static std::vector<NativeFunction> arithmetic_fn_registry_ = {
-      UNARY_SAFE_NULL_IF_NULL(not, boolean, boolean),
-      UNARY_SAFE_NULL_IF_NULL(castBIGINT, int32, int64),
-      UNARY_SAFE_NULL_IF_NULL(castBIGINT, decimal128, int64),
+      UNARY_SAFE_NULL_IF_NULL(not, {}, boolean, boolean),
+      UNARY_SAFE_NULL_IF_NULL(castBIGINT, {}, int32, int64),
+      UNARY_SAFE_NULL_IF_NULL(castBIGINT, {}, decimal128, int64),
 
       // cast to float32
       UNARY_CAST_TO_FLOAT32(int32), UNARY_CAST_TO_FLOAT32(int64),
@@ -46,36 +47,45 @@ std::vector<NativeFunction> GetArithmeticFunctionRegistry() {
       UNARY_CAST_TO_FLOAT64(float32), UNARY_CAST_TO_FLOAT64(decimal128),
 
       // cast to decimal
-      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, int64, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, float64, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, decimal128, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, {}, int32, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, {}, int64, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, {}, float32, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, {}, float64, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(castDECIMAL, {}, decimal128, decimal128),
+      UNARY_UNSAFE_NULL_IF_NULL(castDECIMAL, {}, utf8, decimal128),
 
-      UNARY_SAFE_NULL_IF_NULL(castDATE, int64, date64),
+      UNARY_SAFE_NULL_IF_NULL(castDATE, {}, int64, date64),
 
       // add/sub/multiply/divide/mod
-      BINARY_SYMMETRIC_FN(add), BINARY_SYMMETRIC_FN(subtract),
-      BINARY_SYMMETRIC_FN(multiply),
-      NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide),
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int32, int32),
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, int64, int64, int64),
-      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(add, decimal128),
-      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(subtract, decimal128),
-      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(multiply, decimal128),
-      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(divide, decimal128),
-      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(mod, decimal128),
+      BINARY_SYMMETRIC_FN(add, {}), BINARY_SYMMETRIC_FN(subtract, {}),
+      BINARY_SYMMETRIC_FN(multiply, {}),
+      NUMERIC_TYPES(BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL, divide, {}),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, {"modulo"}, int64, int32, int32),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(mod, {"modulo"}, int64, int64, int64),
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(add, {}, decimal128),
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(subtract, {}, decimal128),
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(multiply, {}, decimal128),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(divide, {}, decimal128),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(mod, {"modulo"}, decimal128),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(mod, {"modulo"}, float64),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(div, {}, int32),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(div, {}, int64),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(div, {}, float32),
+      BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(div, {}, float64),
 
       // compare functions
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(equal, decimal128),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(not_equal, decimal128),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(less_than, decimal128),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(less_than_or_equal_to, decimal128),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(greater_than, decimal128),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(greater_than_or_equal_to, decimal128),
-      BINARY_RELATIONAL_BOOL_FN(equal), BINARY_RELATIONAL_BOOL_FN(not_equal),
-      BINARY_RELATIONAL_BOOL_DATE_FN(less_than),
-      BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to),
-      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than),
-      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to)};
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(equal, {}, decimal128),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(not_equal, {}, decimal128),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(less_than, {}, decimal128),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(less_than_or_equal_to, {}, decimal128),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(greater_than, {}, decimal128),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL(greater_than_or_equal_to, {}, decimal128),
+      BINARY_RELATIONAL_BOOL_FN(equal, ({"eq", "same"})),
+      BINARY_RELATIONAL_BOOL_FN(not_equal, {}),
+      BINARY_RELATIONAL_BOOL_DATE_FN(less_than, {}),
+      BINARY_RELATIONAL_BOOL_DATE_FN(less_than_or_equal_to, {}),
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than, {}),
+      BINARY_RELATIONAL_BOOL_DATE_FN(greater_than_or_equal_to, {})};
 
   return arithmetic_fn_registry_;
 }
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
index f6a3d14c56c..b6119f84e63 100644
--- a/cpp/src/gandiva/function_registry_common.h
+++ b/cpp/src/gandiva/function_registry_common.h
@@ -19,6 +19,7 @@
 #define GANDIVA_FUNCTION_REGISTRY_COMMON_H
 
 #include <memory>
+#include <string>
 #include <unordered_map>
 #include <vector>
 
@@ -74,8 +75,9 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - NULL handling is of type NULL_IF_NULL
 //
 // The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
-#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, TYPE)                             \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \
+#define BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)             \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                   \
+                 DataTypeVector{TYPE(), TYPE()}, TYPE(), kResultNullIfNull, \
                  ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
 
 // Binary functions that :
@@ -84,22 +86,24 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // - can return error.
 //
 // The pre-compiled fn name includes the base name & input type names. eg. add_int32_int32
-#define BINARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                        \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(),          \
-                 kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE), \
+#define BINARY_UNSAFE_NULL_IF_NULL(NAME, ALIASES, IN_TYPE, OUT_TYPE)                  \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                             \
+                 DataTypeVector{IN_TYPE(), IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##IN_TYPE##_##IN_TYPE),                       \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
-#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, TYPE) \
-  BINARY_UNSAFE_NULL_IF_NULL(NAME, TYPE, TYPE)
+#define BINARY_SYMMETRIC_UNSAFE_NULL_IF_NULL(NAME, ALIASES, TYPE) \
+  BINARY_UNSAFE_NULL_IF_NULL(NAME, ALIASES, TYPE, TYPE)
 
 // Binary functions that :
 // - have different input types, or output type
 // - NULL handling is of type NULL_IF_NULL
 //
 // The pre-compiled fn name includes the base name & input type names. eg. mod_int64_int32
-#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, IN_TYPE1, IN_TYPE2, OUT_TYPE) \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(),  \
-                 kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2))
+#define BINARY_GENERIC_SAFE_NULL_IF_NULL(NAME, ALIASES, IN_TYPE1, IN_TYPE2, OUT_TYPE)   \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                               \
+                 DataTypeVector{IN_TYPE1(), IN_TYPE2()}, OUT_TYPE(), kResultNullIfNull, \
+                 ARROW_STRINGIFY(NAME##_##IN_TYPE1##_##IN_TYPE2))
 
 // Binary functions that :
 // - have the same input type
@@ -108,33 +112,34 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 //
 // The pre-compiled fn name includes the base name & input type names.
 // eg. equal_int32_int32
-#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)               \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                      \
+                 DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullIfNull, \
                  ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
 
 // Unary functions that :
 // - NULL handling is of type NULL_IF_NULL
 //
 // The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
-#define UNARY_SAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                          \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
-                 ARROW_STRINGIFY(NAME##_##IN_TYPE))
+#define UNARY_SAFE_NULL_IF_NULL(NAME, ALIASES, IN_TYPE, OUT_TYPE)                    \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{IN_TYPE()}, \
+                 OUT_TYPE(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE))
 
 // Unary functions that :
 // - NULL handling is of type NULL_NEVER
 //
 // The pre-compiled fn name includes the base name & input type name. eg. isnull_int32
-#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, boolean(), kResultNullNever, \
-                 ARROW_STRINGIFY(NAME##_##TYPE))
+#define UNARY_SAFE_NULL_NEVER_BOOL(NAME, ALIASES, TYPE)                           \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 boolean(), kResultNullNever, ARROW_STRINGIFY(NAME##_##TYPE))
 
 // Unary functions that :
 // - NULL handling is of type NULL_INTERNAL
 //
 // The pre-compiled fn name includes the base name & input type name. eg. castFloat_int32
-#define UNARY_UNSAFE_NULL_IF_NULL(NAME, IN_TYPE, OUT_TYPE)                        \
-  NativeFunction(#NAME, DataTypeVector{IN_TYPE()}, OUT_TYPE(), kResultNullIfNull, \
-                 ARROW_STRINGIFY(NAME##_##IN_TYPE),                               \
+#define UNARY_UNSAFE_NULL_IF_NULL(NAME, ALIASES, IN_TYPE, OUT_TYPE)                  \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{IN_TYPE()}, \
+                 OUT_TYPE(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##IN_TYPE),   \
                  NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors)
 
 // Binary functions that :
@@ -142,76 +147,84 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 //
 // The pre-compiled fn name includes the base name & input type names,
 // eg. is_distinct_from_int32_int32
-#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, TYPE)                                      \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \
+#define BINARY_SAFE_NULL_NEVER_BOOL(NAME, ALIASES, TYPE)                      \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                     \
+                 DataTypeVector{TYPE(), TYPE()}, boolean(), kResultNullNever, \
                  ARROW_STRINGIFY(NAME##_##TYPE##_##TYPE))
 
 // Extract functions (used with data/time types) that :
 // - NULL handling is of type NULL_IF_NULL
 //
 // The pre-compiled fn name includes the base name & input type name. eg. extractYear_date
-#define EXTRACT_SAFE_NULL_IF_NULL(NAME, TYPE)                               \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullIfNull, \
-                 ARROW_STRINGIFY(NAME##_##TYPE))
+#define EXTRACT_SAFE_NULL_IF_NULL(NAME, ALIASES, TYPE)                            \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 int64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
 
 // Hash32 functions that :
 // - NULL handling is of type NULL_NEVER
 //
 // The pre-compiled fn name includes the base name & input type name. hash32_int8
-#define HASH32_SAFE_NULL_NEVER(NAME, TYPE)                                 \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int32(), kResultNullNever, \
-                 ARROW_STRINGIFY(NAME##_##TYPE))
+#define HASH32_SAFE_NULL_NEVER(NAME, ALIASES, TYPE)                               \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 int32(), kResultNullNever, ARROW_STRINGIFY(NAME##_##TYPE))
 
 // Hash32 functions that :
 // - NULL handling is of type NULL_NEVER
 //
 // The pre-compiled fn name includes the base name & input type name. hash32_int8
-#define HASH64_SAFE_NULL_NEVER(NAME, TYPE)                                 \
-  NativeFunction(#NAME, DataTypeVector{TYPE()}, int64(), kResultNullNever, \
-                 ARROW_STRINGIFY(NAME##_##TYPE))
+#define HASH64_SAFE_NULL_NEVER(NAME, ALIASES, TYPE)                               \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 int64(), kResultNullNever, ARROW_STRINGIFY(NAME##_##TYPE))
 
 // Hash32 functions with seed that :
 // - NULL handling is of type NULL_NEVER
 //
 // The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
-#define HASH32_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \
+#define HASH32_SEED_SAFE_NULL_NEVER(NAME, ALIASES, TYPE)                     \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                    \
+                 DataTypeVector{TYPE(), int32()}, int32(), kResultNullNever, \
                  ARROW_STRINGIFY(NAME##WithSeed_##TYPE))
 
 // Hash64 functions with seed that :
 // - NULL handling is of type NULL_NEVER
 //
 // The pre-compiled fn name includes the base name & input type name. hash32WithSeed_int8
-#define HASH64_SEED_SAFE_NULL_NEVER(NAME, TYPE)                                     \
-  NativeFunction(#NAME, DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \
+#define HASH64_SEED_SAFE_NULL_NEVER(NAME, ALIASES, TYPE)                     \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES,                    \
+                 DataTypeVector{TYPE(), int64()}, int64(), kResultNullNever, \
                  ARROW_STRINGIFY(NAME##WithSeed_##TYPE))
 
 // Iterate the inner macro over all numeric types
-#define NUMERIC_TYPES(INNER, NAME)                                                       \
-  INNER(NAME, int8), INNER(NAME, int16), INNER(NAME, int32), INNER(NAME, int64),         \
-      INNER(NAME, uint8), INNER(NAME, uint16), INNER(NAME, uint32), INNER(NAME, uint64), \
-      INNER(NAME, float32), INNER(NAME, float64), INNER(NAME, decimal128)
+#define NUMERIC_TYPES(INNER, NAME, ALIASES)                                             \
+  INNER(NAME, ALIASES, int8), INNER(NAME, ALIASES, int16), INNER(NAME, ALIASES, int32), \
+      INNER(NAME, ALIASES, int64), INNER(NAME, ALIASES, uint8),                         \
+      INNER(NAME, ALIASES, uint16), INNER(NAME, ALIASES, uint32),                       \
+      INNER(NAME, ALIASES, uint64), INNER(NAME, ALIASES, float32),                      \
+      INNER(NAME, ALIASES, float64), INNER(NAME, ALIASES, decimal128)
 
 // Iterate the inner macro over numeric and date/time types
-#define NUMERIC_DATE_TYPES(INNER, NAME) \
-  NUMERIC_TYPES(INNER, NAME), DATE_TYPES(INNER, NAME), TIME_TYPES(INNER, NAME)
+#define NUMERIC_DATE_TYPES(INNER, NAME, ALIASES)                         \
+  NUMERIC_TYPES(INNER, NAME, ALIASES), DATE_TYPES(INNER, NAME, ALIASES), \
+      TIME_TYPES(INNER, NAME, ALIASES)
 
 // Iterate the inner macro over all date types
-#define DATE_TYPES(INNER, NAME) INNER(NAME, date64), INNER(NAME, timestamp)
+#define DATE_TYPES(INNER, NAME, ALIASES) \
+  INNER(NAME, ALIASES, date64), INNER(NAME, ALIASES, timestamp)
 
 // Iterate the inner macro over all time types
-#define TIME_TYPES(INNER, NAME) INNER(NAME, time32)
+#define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)
 
 // Iterate the inner macro over all data types
-#define VAR_LEN_TYPES(INNER, NAME) INNER(NAME, utf8), INNER(NAME, binary)
+#define VAR_LEN_TYPES(INNER, NAME, ALIASES) \
+  INNER(NAME, ALIASES, utf8), INNER(NAME, ALIASES, binary)
 
 // Iterate the inner macro over all numeric types, date types and bool type
-#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME) \
-  NUMERIC_DATE_TYPES(INNER, NAME), INNER(NAME, boolean)
+#define NUMERIC_BOOL_DATE_TYPES(INNER, NAME, ALIASES) \
+  NUMERIC_DATE_TYPES(INNER, NAME, ALIASES), INNER(NAME, ALIASES, boolean)
 
 // Iterate the inner macro over all numeric types, date types, bool and varlen types
-#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME) \
-  NUMERIC_BOOL_DATE_TYPES(INNER, NAME), VAR_LEN_TYPES(INNER, NAME)
+#define NUMERIC_BOOL_DATE_VAR_LEN_TYPES(INNER, NAME, ALIASES) \
+  NUMERIC_BOOL_DATE_TYPES(INNER, NAME, ALIASES), VAR_LEN_TYPES(INNER, NAME, ALIASES)
 
 }  // namespace gandiva
 
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index f36e5678c0d..2f52b74ba25 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -20,44 +20,44 @@
 
 namespace gandiva {
 
-#define DATE_EXTRACTION_FNS(name)                           \
-  DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Millennium),  \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Century), \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Decade),  \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Year),    \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Quarter), \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Month),   \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Week),    \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Day),     \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour),    \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute),  \
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second)
+#define DATE_EXTRACTION_FNS(name)                               \
+  DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Millennium, {}),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Century, {}), \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Decade, {}),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Year, {}),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Quarter, {}), \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Month, {}),   \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Week, {}),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Day, {}),     \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {}),    \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {}),  \
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {})
 
-#define TIME_EXTRACTION_FNS(name)                          \
-  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour),       \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute), \
-      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second)
+#define TIME_EXTRACTION_FNS(name)                              \
+  TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Hour, {}),       \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Minute, {}), \
+      TIME_TYPES(EXTRACT_SAFE_NULL_IF_NULL, name##Second, {})
 
 std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
   static std::vector<NativeFunction> date_time_fn_registry_ = {
       DATE_EXTRACTION_FNS(extract),
       DATE_EXTRACTION_FNS(date_trunc_),
 
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy),
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow),
-      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch),
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDoy, {}),
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractDow, {}),
+      DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch, {}),
 
       TIME_EXTRACTION_FNS(extract),
 
-      NativeFunction("castDATE", DataTypeVector{utf8()}, date64(), kResultNullIfNull,
+      NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
                      "castDATE_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
-      NativeFunction("castTIMESTAMP", DataTypeVector{utf8()}, timestamp(),
+      NativeFunction("castTIMESTAMP", {}, DataTypeVector{utf8()}, timestamp(),
                      kResultNullIfNull, "castTIMESTAMP_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
 
-      NativeFunction("to_date", DataTypeVector{utf8(), utf8(), int32()}, date64(),
+      NativeFunction("to_date", {}, DataTypeVector{utf8(), utf8(), int32()}, date64(),
                      kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32",
                      NativeFunction::kNeedsContext |
                          NativeFunction::kNeedsFunctionHolder |
diff --git a/cpp/src/gandiva/function_registry_hash.cc b/cpp/src/gandiva/function_registry_hash.cc
index a163a230eac..4184f50ba81 100644
--- a/cpp/src/gandiva/function_registry_hash.cc
+++ b/cpp/src/gandiva/function_registry_hash.cc
@@ -20,32 +20,32 @@
 
 namespace gandiva {
 
-#define HASH32_SAFE_NULL_NEVER_FN(name) \
-  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, name)
+#define HASH32_SAFE_NULL_NEVER_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SAFE_NULL_NEVER, name, ALIASES)
 
-#define HASH32_SEED_SAFE_NULL_NEVER_FN(name) \
-  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, name)
+#define HASH32_SEED_SAFE_NULL_NEVER_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH32_SEED_SAFE_NULL_NEVER, name, ALIASES)
 
-#define HASH64_SAFE_NULL_NEVER_FN(name) \
-  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, name)
+#define HASH64_SAFE_NULL_NEVER_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SAFE_NULL_NEVER, name, ALIASES)
 
-#define HASH64_SEED_SAFE_NULL_NEVER_FN(name) \
-  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, name)
+#define HASH64_SEED_SAFE_NULL_NEVER_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_VAR_LEN_TYPES(HASH64_SEED_SAFE_NULL_NEVER, name, ALIASES)
 
 std::vector<NativeFunction> GetHashFunctionRegistry() {
   static std::vector<NativeFunction> hash_fn_registry_ = {
-      HASH32_SAFE_NULL_NEVER_FN(hash),
-      HASH32_SAFE_NULL_NEVER_FN(hash32),
-      HASH32_SAFE_NULL_NEVER_FN(hash32AsDouble),
+      HASH32_SAFE_NULL_NEVER_FN(hash, {}),
+      HASH32_SAFE_NULL_NEVER_FN(hash32, {}),
+      HASH32_SAFE_NULL_NEVER_FN(hash32AsDouble, {}),
 
-      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32),
-      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32AsDouble),
+      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32, {}),
+      HASH32_SEED_SAFE_NULL_NEVER_FN(hash32AsDouble, {}),
 
-      HASH64_SAFE_NULL_NEVER_FN(hash64),
-      HASH64_SAFE_NULL_NEVER_FN(hash64AsDouble),
+      HASH64_SAFE_NULL_NEVER_FN(hash64, {}),
+      HASH64_SAFE_NULL_NEVER_FN(hash64AsDouble, {}),
 
-      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64),
-      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64AsDouble)};
+      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64, {}),
+      HASH64_SEED_SAFE_NULL_NEVER_FN(hash64AsDouble, {})};
 
   return hash_fn_registry_;
 }
diff --git a/cpp/src/gandiva/function_registry_math_ops.cc b/cpp/src/gandiva/function_registry_math_ops.cc
index 2084b7bd04c..046556b4088 100644
--- a/cpp/src/gandiva/function_registry_math_ops.cc
+++ b/cpp/src/gandiva/function_registry_math_ops.cc
@@ -20,55 +20,56 @@
 
 namespace gandiva {
 
-#define MATH_UNARY_OPS(name)                           \
-  UNARY_SAFE_NULL_IF_NULL(name, int32, float64),       \
-      UNARY_SAFE_NULL_IF_NULL(name, int64, float64),   \
-      UNARY_SAFE_NULL_IF_NULL(name, uint32, float64),  \
-      UNARY_SAFE_NULL_IF_NULL(name, uint64, float64),  \
-      UNARY_SAFE_NULL_IF_NULL(name, float32, float64), \
-      UNARY_SAFE_NULL_IF_NULL(name, float64, float64)
+#define MATH_UNARY_OPS(name, ALIASES)                           \
+  UNARY_SAFE_NULL_IF_NULL(name, ALIASES, int32, float64),       \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, int64, float64),   \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, uint32, float64),  \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, uint64, float64),  \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, float32, float64), \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, float64, float64)
 
-#define MATH_BINARY_UNSAFE(name)                          \
-  BINARY_UNSAFE_NULL_IF_NULL(name, int32, float64),       \
-      BINARY_UNSAFE_NULL_IF_NULL(name, int64, float64),   \
-      BINARY_UNSAFE_NULL_IF_NULL(name, uint32, float64),  \
-      BINARY_UNSAFE_NULL_IF_NULL(name, uint64, float64),  \
-      BINARY_UNSAFE_NULL_IF_NULL(name, float32, float64), \
-      BINARY_UNSAFE_NULL_IF_NULL(name, float64, float64)
+#define MATH_BINARY_UNSAFE(name, ALIASES)                          \
+  BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, int32, float64),       \
+      BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, int64, float64),   \
+      BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, uint32, float64),  \
+      BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, uint64, float64),  \
+      BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, float32, float64), \
+      BINARY_UNSAFE_NULL_IF_NULL(name, ALIASES, float64, float64)
 
-#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name) \
-  NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name)
+#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name, ALIASES)
 
-#define BINARY_SAFE_NULL_NEVER_BOOL_FN(name) \
-  NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, name)
+#define BINARY_SAFE_NULL_NEVER_BOOL_FN(name, ALIASES) \
+  NUMERIC_BOOL_DATE_TYPES(BINARY_SAFE_NULL_NEVER_BOOL, name, ALIASES)
 
 std::vector<NativeFunction> GetMathOpsFunctionRegistry() {
   static std::vector<NativeFunction> math_fn_registry_ = {
-      MATH_UNARY_OPS(cbrt),
-      MATH_UNARY_OPS(exp),
-      MATH_UNARY_OPS(log),
-      MATH_UNARY_OPS(log10),
+      MATH_UNARY_OPS(cbrt, {}),
+      MATH_UNARY_OPS(exp, {}),
+      MATH_UNARY_OPS(log, {}),
+      MATH_UNARY_OPS(log10, {}),
 
-      MATH_BINARY_UNSAFE(log),
+      MATH_BINARY_UNSAFE(log, {}),
 
-      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, float64),
+      BINARY_SYMMETRIC_SAFE_NULL_IF_NULL(power, {"pow"}, float64),
 
-      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull),
-      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull),
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
 
-      NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric),
+      NUMERIC_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, isnumeric, {}),
 
-      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_distinct_from),
-      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_not_distinct_from),
+      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_distinct_from, {}),
+      BINARY_SAFE_NULL_NEVER_BOOL_FN(is_not_distinct_from, {}),
 
       // decimal functions
-      UNARY_SAFE_NULL_IF_NULL(abs, decimal128, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(ceil, decimal128, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(floor, decimal128, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(round, decimal128, decimal128),
-      UNARY_SAFE_NULL_IF_NULL(truncate, decimal128, decimal128),
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(round, decimal128, int32, decimal128),
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(truncate, decimal128, int32, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(abs, {}, decimal128, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(ceil, {}, decimal128, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(floor, {}, decimal128, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(round, {}, decimal128, decimal128),
+      UNARY_SAFE_NULL_IF_NULL(truncate, {"trunc"}, decimal128, decimal128),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(round, {}, decimal128, int32, decimal128),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(truncate, {"trunc"}, decimal128, int32,
+                                       decimal128),
   };
 
   return math_fn_registry_;
diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc
index 19e31c8d5c0..bd2fe18ccc5 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -20,49 +20,55 @@
 
 namespace gandiva {
 
-#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(name) \
-  VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name)
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(name, ALIASES) \
+  VAR_LEN_TYPES(BINARY_RELATIONAL_SAFE_NULL_IF_NULL, name, ALIASES)
 
-#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(name) \
-  BINARY_RELATIONAL_SAFE_NULL_IF_NULL(name, utf8)
+#define BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(name, ALIASES) \
+  BINARY_RELATIONAL_SAFE_NULL_IF_NULL(name, ALIASES, utf8)
 
-#define UNARY_OCTET_LEN_FN(name) \
-  UNARY_SAFE_NULL_IF_NULL(name, utf8, int32), UNARY_SAFE_NULL_IF_NULL(name, binary, int32)
+#define UNARY_OCTET_LEN_FN(name, ALIASES)              \
+  UNARY_SAFE_NULL_IF_NULL(name, ALIASES, utf8, int32), \
+      UNARY_SAFE_NULL_IF_NULL(name, ALIASES, binary, int32)
 
-#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name) \
-  VAR_LEN_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name)
+#define UNARY_SAFE_NULL_NEVER_BOOL_FN(name, ALIASES) \
+  VAR_LEN_TYPES(UNARY_SAFE_NULL_NEVER_BOOL, name, ALIASES)
 
 std::vector<NativeFunction> GetStringFunctionRegistry() {
   static std::vector<NativeFunction> string_fn_registry_ = {
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(equal),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(not_equal),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than_or_equal_to),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than_or_equal_to),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(equal, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(not_equal, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(less_than_or_equal_to, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_FN(greater_than_or_equal_to, {}),
 
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(starts_with),
-      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(ends_with),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(starts_with, {}),
+      BINARY_RELATIONAL_SAFE_NULL_IF_NULL_UTF8_FN(ends_with, {}),
 
-      UNARY_OCTET_LEN_FN(octet_length),
-      UNARY_OCTET_LEN_FN(bit_length),
+      UNARY_OCTET_LEN_FN(octet_length, {}),
+      UNARY_OCTET_LEN_FN(bit_length, {}),
 
-      UNARY_UNSAFE_NULL_IF_NULL(char_length, utf8, int32),
-      UNARY_UNSAFE_NULL_IF_NULL(length, utf8, int32),
-      UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, binary, int32),
+      UNARY_UNSAFE_NULL_IF_NULL(char_length, {}, utf8, int32),
+      UNARY_UNSAFE_NULL_IF_NULL(length, {}, utf8, int32),
+      UNARY_UNSAFE_NULL_IF_NULL(lengthUtf8, {}, binary, int32),
 
-      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull),
-      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull),
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnull, {}),
+      UNARY_SAFE_NULL_NEVER_BOOL_FN(isnotnull, {}),
 
-      NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
+      NativeFunction("upper", {}, DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
                      "upper_utf8", NativeFunction::kNeedsContext),
 
-      NativeFunction("castVARCHAR", DataTypeVector{utf8(), int64()}, utf8(),
+      NativeFunction("castVARCHAR", {}, DataTypeVector{utf8(), int64()}, utf8(),
                      kResultNullIfNull, "castVARCHAR_utf8_int64",
                      NativeFunction::kNeedsContext),
 
-      NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull,
-                     "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)};
+      NativeFunction("castVARCHAR", {}, DataTypeVector{decimal128(), int64()}, utf8(),
+                     kResultNullIfNull, "castVARCHAR_decimal128_int64",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("like", {}, DataTypeVector{utf8(), utf8()}, boolean(),
+                     kResultNullIfNull, "gdv_fn_like_utf8_utf8",
+                     NativeFunction::kNeedsFunctionHolder)};
 
   return string_fn_registry_;
 }
diff --git a/cpp/src/gandiva/function_registry_test.cc b/cpp/src/gandiva/function_registry_test.cc
index 247d13ea46d..6d96d795087 100644
--- a/cpp/src/gandiva/function_registry_test.cc
+++ b/cpp/src/gandiva/function_registry_test.cc
@@ -17,6 +17,7 @@
 
 #include "gandiva/function_registry.h"
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 namespace gandiva {
@@ -31,7 +32,7 @@ TEST_F(TestFunctionRegistry, TestFound) {
 
   const NativeFunction* function = registry_.LookupSignature(add_i32_i32);
   EXPECT_NE(function, nullptr);
-  EXPECT_EQ(function->signature(), add_i32_i32);
+  EXPECT_THAT(function->signatures(), testing::Contains(add_i32_i32));
   EXPECT_EQ(function->pc_name(), "add_int32_int32");
 }
 
diff --git a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
index 758721246d3..b9d41eaa7d9 100644
--- a/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
+++ b/cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
@@ -20,60 +20,60 @@
 
 namespace gandiva {
 
-#define TIMESTAMP_ADD_FNS(name)                                            \
-  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, timestamp),     \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64),       \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp), \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64)
+#define TIMESTAMP_ADD_FNS(name, ALIASES)                                            \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, timestamp, timestamp),     \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, date64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, timestamp, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, date64, date64)
 
-#define TIMESTAMP_DIFF_FN(name) \
-  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, timestamp, int32)
+#define TIMESTAMP_DIFF_FN(name, ALIASES) \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, timestamp, timestamp, int32)
 
-#define DATE_ADD_FNS(name)                                                 \
-  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int32, date64),           \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int32, timestamp), \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, date64, int64, date64),       \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, timestamp, int64, timestamp), \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64),       \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, timestamp), \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64),       \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, timestamp)
+#define DATE_ADD_FNS(name, ALIASES)                                                 \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, date64, int32, date64),           \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, timestamp, int32, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, date64, int64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, timestamp, int64, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, date64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, timestamp, timestamp), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, date64, date64),       \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, timestamp, timestamp)
 
-#define DATE_DIFF_FNS(name)                                             \
-  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, date64, date64),        \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int32, timestamp, date64), \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, date64, date64),    \
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, int64, timestamp, date64)
+#define DATE_DIFF_FNS(name, ALIASES)                                             \
+  BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, date64, date64),        \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int32, timestamp, date64), \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, date64, date64),    \
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, int64, timestamp, date64)
 
 std::vector<NativeFunction> GetDateTimeArithmeticFunctionRegistry() {
   static std::vector<NativeFunction> datetime_fn_registry_ = {
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64),
-      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, {}, date64, date64, float64),
+      BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, {}, timestamp, timestamp, float64),
 
-      TIMESTAMP_DIFF_FN(timestampdiffSecond),
-      TIMESTAMP_DIFF_FN(timestampdiffMinute),
-      TIMESTAMP_DIFF_FN(timestampdiffHour),
-      TIMESTAMP_DIFF_FN(timestampdiffDay),
-      TIMESTAMP_DIFF_FN(timestampdiffWeek),
-      TIMESTAMP_DIFF_FN(timestampdiffMonth),
-      TIMESTAMP_DIFF_FN(timestampdiffQuarter),
-      TIMESTAMP_DIFF_FN(timestampdiffYear),
+      TIMESTAMP_DIFF_FN(timestampdiffSecond, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffMinute, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffHour, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffDay, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffWeek, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffMonth, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffQuarter, {}),
+      TIMESTAMP_DIFF_FN(timestampdiffYear, {}),
 
-      TIMESTAMP_ADD_FNS(timestampaddSecond),
-      TIMESTAMP_ADD_FNS(timestampaddMinute),
-      TIMESTAMP_ADD_FNS(timestampaddHour),
-      TIMESTAMP_ADD_FNS(timestampaddDay),
-      TIMESTAMP_ADD_FNS(timestampaddWeek),
-      TIMESTAMP_ADD_FNS(timestampaddMonth),
-      TIMESTAMP_ADD_FNS(timestampaddQuarter),
-      TIMESTAMP_ADD_FNS(timestampaddYear),
+      TIMESTAMP_ADD_FNS(timestampaddSecond, {}),
+      TIMESTAMP_ADD_FNS(timestampaddMinute, {}),
+      TIMESTAMP_ADD_FNS(timestampaddHour, {}),
+      TIMESTAMP_ADD_FNS(timestampaddDay, {}),
+      TIMESTAMP_ADD_FNS(timestampaddWeek, {}),
+      TIMESTAMP_ADD_FNS(timestampaddMonth, {}),
+      TIMESTAMP_ADD_FNS(timestampaddQuarter, {}),
+      TIMESTAMP_ADD_FNS(timestampaddYear, {}),
 
-      DATE_ADD_FNS(date_add),
-      DATE_ADD_FNS(add),
+      DATE_ADD_FNS(date_add, {}),
+      DATE_ADD_FNS(add, {}),
 
-      DATE_DIFF_FNS(date_sub),
-      DATE_DIFF_FNS(subtract),
-      DATE_DIFF_FNS(date_diff)};
+      DATE_DIFF_FNS(date_sub, {}),
+      DATE_DIFF_FNS(subtract, {}),
+      DATE_DIFF_FNS(date_diff, {})};
 
   return datetime_fn_registry_;
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 5eacdf769d0..08d12d05f50 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -72,6 +72,61 @@ bool gdv_fn_in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len,
       reinterpret_cast<gandiva::InHolder<std::string>*>(ptr);
   return holder->HasValue(std::string(data, data_len));
 }
+
+int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,
+                                      int32_t* offsets, int64_t slot,
+                                      const char* entry_buf, int32_t entry_len) {
+  auto buffer = reinterpret_cast<arrow::ResizableBuffer*>(data_ptr);
+  int32_t offset = static_cast<int32_t>(buffer->size());
+
+  // This also sets the size in the buffer.
+  auto status = buffer->Resize(offset + entry_len, false /*shrink*/);
+  if (!status.ok()) {
+    gandiva::ExecutionContext* context =
+        reinterpret_cast<gandiva::ExecutionContext*>(context_ptr);
+
+    context->set_error_msg(status.message().c_str());
+    return -1;
+  }
+
+  // append the new entry.
+  memcpy(buffer->mutable_data() + offset, entry_buf, entry_len);
+
+  // update offsets buffer.
+  offsets[slot] = offset;
+  offsets[slot + 1] = offset + entry_len;
+  return 0;
+}
+
+int32_t gdv_fn_dec_from_string(int64_t context, const char* in, int32_t in_length,
+                               int32_t* precision_from_str, int32_t* scale_from_str,
+                               int64_t* dec_high_from_str, uint64_t* dec_low_from_str) {
+  arrow::Decimal128 dec;
+  auto status = arrow::Decimal128::FromString(std::string(in, in_length), &dec,
+                                              precision_from_str, scale_from_str);
+  if (!status.ok()) {
+    gdv_fn_context_set_error_msg(context, status.message().data());
+    return -1;
+  }
+  *dec_high_from_str = dec.high_bits();
+  *dec_low_from_str = dec.low_bits();
+  return 0;
+}
+
+char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
+                           int32_t x_scale, int32_t* dec_str_len) {
+  arrow::Decimal128 dec(arrow::BasicDecimal128(x_high, x_low));
+  std::string dec_str = dec.ToString(x_scale);
+  *dec_str_len = static_cast<int32_t>(dec_str.length());
+  char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *dec_str_len));
+  if (ret == nullptr) {
+    std::string err_msg = "Could not allocate memory for string: " + dec_str;
+    gdv_fn_context_set_error_msg(context, err_msg.data());
+    return nullptr;
+  }
+  memcpy(ret, dec_str.data(), *dec_str_len);
+  return ret;
+}
 }
 
 namespace gandiva {
@@ -80,6 +135,34 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   std::vector<llvm::Type*> args;
   auto types = engine->types();
 
+  // gdv_fn_dec_from_string
+  args = {
+      types->i64_type(),      // context
+      types->i8_ptr_type(),   // const char* in
+      types->i32_type(),      // int32_t in_length
+      types->i32_ptr_type(),  // int32_t* precision_from_str
+      types->i32_ptr_type(),  // int32_t* scale_from_str
+      types->i64_ptr_type(),  // int64_t* dec_high_from_str
+      types->i64_ptr_type(),  // int64_t* dec_low_from_str
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_dec_from_string",
+                                  types->i32_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_dec_from_string));
+
+  // gdv_fn_dec_to_string
+  args = {
+      types->i64_type(),      // context
+      types->i64_type(),      // int64_t x_high
+      types->i64_type(),      // int64_t x_low
+      types->i32_type(),      // int32_t x_scale
+      types->i64_ptr_type(),  // int64_t* dec_str_len
+  };
+
+  engine->AddGlobalMappingForFunc("gdv_fn_dec_to_string",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_dec_to_string));
+
   // gdv_fn_like_utf8_utf8
   args = {types->i64_type(),     // int64_t ptr
           types->i8_ptr_type(),  // const char* data
@@ -135,6 +218,18 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
   engine->AddGlobalMappingForFunc("gdv_fn_in_expr_lookup_utf8",
                                   types->i1_type() /*return_type*/, args,
                                   reinterpret_cast<void*>(gdv_fn_in_expr_lookup_utf8));
+
+  // gdv_fn_populate_varlen_vector
+  args = {types->i64_type(),      // int64_t execution_context
+          types->i8_ptr_type(),   // int8_t* data ptr
+          types->i32_ptr_type(),  // int32_t* offsets ptr
+          types->i64_type(),      // int64_t slot
+          types->i8_ptr_type(),   // const char* entry_buf
+          types->i32_type()};     // int32_t entry__len
+
+  engine->AddGlobalMappingForFunc("gdv_fn_populate_varlen_vector",
+                                  types->i32_type() /*return_type*/, args,
+                                  reinterpret_cast<void*>(gdv_fn_populate_varlen_vector));
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 8f940cee0f4..fcdf7d6ac66 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -46,6 +46,13 @@ bool in_expr_lookup_utf8(int64_t ptr, const char* data, int data_len, bool in_va
 
 int gdv_fn_time_with_zone(int* time_fields, const char* zone, int zone_len,
                           int64_t* ret_time);
+
+int32_t gdv_fn_dec_from_string(int64_t context, const char* in, int32_t in_length,
+                               int32_t* precision_from_str, int32_t* scale_from_str,
+                               int64_t* dec_high_from_str, uint64_t* dec_low_from_str);
+
+char* gdv_fn_dec_to_string(int64_t context, int64_t x_high, uint64_t x_low,
+                           int32_t x_scale, int32_t* dec_str_len);
 }
 
 #endif  // GDV_FUNCTION_STUBS_H
diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc
index 09d27398b67..eeaaca798de 100644
--- a/cpp/src/gandiva/jni/jni_common.cc
+++ b/cpp/src/gandiva/jni/jni_common.cc
@@ -72,6 +72,11 @@ jclass configuration_builder_class_;
 
 // refs for self.
 static jclass gandiva_exception_;
+static jclass vector_expander_class_;
+static jclass vector_expander_ret_class_;
+static jmethodID vector_expander_method_;
+static jfieldID vector_expander_ret_address_;
+static jfieldID vector_expander_ret_capacity_;
 
 // module maps
 gandiva::IdToModuleMap<std::shared_ptr<ProjectorHolder>> projector_modules_;
@@ -91,10 +96,27 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) {
   jclass localExceptionClass =
       env->FindClass("org/apache/arrow/gandiva/exceptions/GandivaException");
   gandiva_exception_ = (jclass)env->NewGlobalRef(localExceptionClass);
+  env->ExceptionDescribe();
   env->DeleteLocalRef(localExceptionClass);
 
-  env->ExceptionDescribe();
+  jclass local_expander_class =
+      env->FindClass("org/apache/arrow/gandiva/evaluator/VectorExpander");
+  vector_expander_class_ = (jclass)env->NewGlobalRef(local_expander_class);
+  env->DeleteLocalRef(local_expander_class);
+
+  vector_expander_method_ = env->GetMethodID(
+      vector_expander_class_, "expandOutputVectorAtIndex",
+      "(II)Lorg/apache/arrow/gandiva/evaluator/VectorExpander$ExpandResult;");
+
+  jclass local_expander_ret_class =
+      env->FindClass("org/apache/arrow/gandiva/evaluator/VectorExpander$ExpandResult");
+  vector_expander_ret_class_ = (jclass)env->NewGlobalRef(local_expander_ret_class);
+  env->DeleteLocalRef(local_expander_ret_class);
 
+  vector_expander_ret_address_ =
+      env->GetFieldID(vector_expander_ret_class_, "address", "J");
+  vector_expander_ret_capacity_ =
+      env->GetFieldID(vector_expander_ret_class_, "capacity", "I");
   return JNI_VERSION;
 }
 
@@ -103,6 +125,8 @@ void JNI_OnUnload(JavaVM* vm, void* reserved) {
   vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION);
   env->DeleteGlobalRef(configuration_builder_class_);
   env->DeleteGlobalRef(gandiva_exception_);
+  env->DeleteGlobalRef(vector_expander_class_);
+  env->DeleteGlobalRef(vector_expander_ret_class_);
 }
 
 DataTypePtr ProtoTypeToTime32(const types::ExtGandivaType& ext_type) {
@@ -632,6 +656,67 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_build
   return module_id;
 }
 
+///
+/// \brief Resizable buffer which resizes by doing a callback into java.
+///
+class JavaResizableBuffer : public arrow::ResizableBuffer {
+ public:
+  JavaResizableBuffer(JNIEnv* env, jobject jexpander, int32_t vector_idx, uint8_t* buffer,
+                      int32_t len)
+      : ResizableBuffer(buffer, len),
+        env_(env),
+        jexpander_(jexpander),
+        vector_idx_(vector_idx) {
+    size_ = 0;
+  }
+
+  Status Resize(const int64_t new_size, bool shrink_to_fit) override;
+
+  Status Reserve(const int64_t new_capacity) override {
+    return Status::NotImplemented("reserve not implemented");
+  }
+
+ private:
+  JNIEnv* env_;
+  jobject jexpander_;
+  int32_t vector_idx_;
+};
+
+Status JavaResizableBuffer::Resize(const int64_t new_size, bool shrink_to_fit) {
+  if (shrink_to_fit == true) {
+    return Status::NotImplemented("shrink not implemented");
+  }
+
+  if (ARROW_PREDICT_TRUE(new_size < capacity())) {
+    // no need to expand.
+    size_ = new_size;
+    return Status::OK();
+  }
+
+  if (new_size > INT32_MAX) {
+    return Status::OutOfMemory("java supports buffer sizes upto 2GB only");
+  }
+
+  // callback into java to expand the buffer
+  int32_t updated_capacity = static_cast<int32_t>(new_size);
+  jobject ret = env_->CallObjectMethod(jexpander_, vector_expander_method_, vector_idx_,
+                                       updated_capacity);
+  if (env_->ExceptionCheck()) {
+    env_->ExceptionDescribe();
+    env_->ExceptionClear();
+    return Status::OutOfMemory("buffer expand failed in java");
+  }
+
+  jlong ret_address = env_->GetLongField(ret, vector_expander_ret_address_);
+  jint ret_capacity = env_->GetIntField(ret, vector_expander_ret_capacity_);
+  DCHECK_GE(ret_capacity, updated_capacity);
+
+  data_ = mutable_data_ = reinterpret_cast<uint8_t*>(ret_address);
+  size_ = new_size;
+  capacity_ = ret_capacity;
+  return Status::OK();
+}
+
 #define CHECK_OUT_BUFFER_IDX_AND_BREAK(idx, len)                               \
   if (idx >= len) {                                                            \
     status = gandiva::Status::Invalid("insufficient number of out_buf_addrs"); \
@@ -640,9 +725,10 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_JniWrapper_build
 
 JNIEXPORT void JNICALL
 Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector(
-    JNIEnv* env, jobject cls, jlong module_id, jint num_rows, jlongArray buf_addrs,
-    jlongArray buf_sizes, jint sel_vec_type, jint sel_vec_rows, jlong sel_vec_addr,
-    jlong sel_vec_size, jlongArray out_buf_addrs, jlongArray out_buf_sizes) {
+    JNIEnv* env, jobject object, jobject jexpander, jlong module_id, jint num_rows,
+    jlongArray buf_addrs, jlongArray buf_sizes, jint sel_vec_type, jint sel_vec_rows,
+    jlong sel_vec_addr, jlong sel_vec_size, jlongArray out_buf_addrs,
+    jlongArray out_buf_sizes) {
   Status status;
   std::shared_ptr<ProjectorHolder> holder = projector_modules_.Lookup(module_id);
   if (holder == nullptr) {
@@ -709,22 +795,45 @@ Java_org_apache_arrow_gandiva_evaluator_JniWrapper_evaluateProjector(
     ArrayDataVector output;
     int buf_idx = 0;
     int sz_idx = 0;
+    int output_vector_idx = 0;
     for (FieldPtr field : ret_types) {
+      std::vector<std::shared_ptr<arrow::Buffer>> buffers;
+
       CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len);
       uint8_t* validity_buf = reinterpret_cast<uint8_t*>(out_bufs[buf_idx++]);
       jlong bitmap_sz = out_sizes[sz_idx++];
-      std::shared_ptr<arrow::MutableBuffer> bitmap_buf =
-          std::make_shared<arrow::MutableBuffer>(validity_buf, bitmap_sz);
+      buffers.push_back(std::make_shared<arrow::MutableBuffer>(validity_buf, bitmap_sz));
+
+      if (arrow::is_binary_like(field->type()->id())) {
+        CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len);
+        uint8_t* offsets_buf = reinterpret_cast<uint8_t*>(out_bufs[buf_idx++]);
+        jlong offsets_sz = out_sizes[sz_idx++];
+        buffers.push_back(
+            std::make_shared<arrow::MutableBuffer>(offsets_buf, offsets_sz));
+      }
 
       CHECK_OUT_BUFFER_IDX_AND_BREAK(buf_idx, out_bufs_len);
       uint8_t* value_buf = reinterpret_cast<uint8_t*>(out_bufs[buf_idx++]);
       jlong data_sz = out_sizes[sz_idx++];
-      std::shared_ptr<arrow::MutableBuffer> data_buf =
-          std::make_shared<arrow::MutableBuffer>(value_buf, data_sz);
+      if (arrow::is_binary_like(field->type()->id())) {
+        if (jexpander == nullptr) {
+          status = Status::Invalid(
+              "expression has variable len output columns, but the expander object is "
+              "null");
+          break;
+        }
+        buffers.push_back(std::make_shared<JavaResizableBuffer>(
+            env, jexpander, output_vector_idx, value_buf, data_sz));
+      } else {
+        buffers.push_back(std::make_shared<arrow::MutableBuffer>(value_buf, data_sz));
+      }
 
-      auto array_data =
-          arrow::ArrayData::Make(field->type(), output_row_count, {bitmap_buf, data_buf});
+      auto array_data = arrow::ArrayData::Make(field->type(), output_row_count, buffers);
       output.push_back(array_data);
+      ++output_vector_idx;
+    }
+    if (!status.ok()) {
+      break;
     }
     status = holder->projector()->Evaluate(*in_batch, selection_vector.get(), output);
   } while (0);
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 1d5946dec80..57f67cb83dd 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "gandiva/bitmap_accumulator.h"
+#include "gandiva/decimal_ir.h"
 #include "gandiva/dex.h"
 #include "gandiva/expr_decomposer.h"
 #include "gandiva/expression.h"
@@ -155,6 +156,14 @@ llvm::Value* LLVMGenerator::GetValidityReference(llvm::Value* arg_addrs, int idx
   return ir_builder()->CreateIntToPtr(load, types()->i64_ptr_type(), name + "_varray");
 }
 
+/// Get reference to data array at specified index in the args list.
+llvm::Value* LLVMGenerator::GetDataBufferPtrReference(llvm::Value* arg_addrs, int idx,
+                                                      FieldPtr field) {
+  const std::string& name = field->name();
+  llvm::Value* load = LoadVectorAtIndex(arg_addrs, idx, name);
+  return ir_builder()->CreateIntToPtr(load, types()->i8_ptr_type(), name + "_buf_ptr");
+}
+
 /// Get reference to data array at specified index in the args list.
 llvm::Value* LLVMGenerator::GetDataReference(llvm::Value* arg_addrs, int idx,
                                              FieldPtr field) {
@@ -293,6 +302,10 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, FieldDescriptorPtr out
   builder->SetInsertPoint(loop_entry);
   llvm::Value* output_ref =
       GetDataReference(arg_addrs, output->data_idx(), output->field());
+  llvm::Value* output_buffer_ptr_ref = GetDataBufferPtrReference(
+      arg_addrs, output->data_buffer_ptr_idx(), output->field());
+  llvm::Value* output_offset_ref =
+      GetOffsetsReference(arg_addrs, output->offsets_idx(), output->field());
 
   // Loop body
   builder->SetInsertPoint(loop_body);
@@ -323,6 +336,7 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, FieldDescriptorPtr out
 
   // save the value in the output vector.
   builder->SetInsertPoint(loop_body_tail);
+
   auto output_type_id = output->Type()->id();
   if (output_type_id == arrow::Type::BOOL) {
     SetPackedBitValue(output_ref, loop_var, output_value->data());
@@ -330,6 +344,13 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, FieldDescriptorPtr out
              output_type_id == arrow::Type::DECIMAL) {
     llvm::Value* slot_offset = builder->CreateGEP(output_ref, loop_var);
     builder->CreateStore(output_value->data(), slot_offset);
+  } else if (arrow::is_binary_like(output_type_id)) {
+    // Var-len output. Make a function call to populate the data.
+    // if there is an error, the fn sets it in the context. And, will be returned at the
+    // end of this row batch.
+    AddFunctionCall("gdv_fn_populate_varlen_vector", types()->i32_type(),
+                    {arg_context_ptr, output_buffer_ptr_ref, output_offset_ref, loop_var,
+                     output_value->data(), output_value->length()});
   } else {
     return Status::NotImplemented("output type ", output->Type()->ToString(),
                                   " not supported");
@@ -1092,6 +1113,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func,
   auto types = generator_->types();
   auto arrow_return_type_id = arrow_return_type->id();
   auto llvm_return_type = types->IRType(arrow_return_type_id);
+  DecimalIR decimalIR(generator_->engine_.get());
 
   if (arrow_return_type_id == arrow::Type::DECIMAL) {
     // For decimal fns, the output precision/scale are passed along as parameters.
@@ -1107,10 +1129,16 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func,
     params->push_back(ret_lvalue->scale());
 
     // Make the function call
-    auto out = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
+    auto out = decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params);
     ret_lvalue->set_data(out);
     return std::move(ret_lvalue);
   } else {
+    bool isDecimalFunction = false;
+    for (auto& arg : *params) {
+      if (arg->getType() == types->i128_type()) {
+        isDecimalFunction = true;
+      }
+    }
     // add extra arg for return length for variable len return types (alloced on stack).
     llvm::AllocaInst* result_len_ptr = nullptr;
     if (arrow::is_binary_like(arrow_return_type_id)) {
@@ -1122,7 +1150,10 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func,
 
     // Make the function call
     llvm::IRBuilder<>* builder = ir_builder();
-    auto value = generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
+    auto value =
+        isDecimalFunction
+            ? decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params)
+            : generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params);
     auto value_len =
         (result_len_ptr == nullptr) ? nullptr : builder->CreateLoad(result_len_ptr);
     return std::make_shared<LValue>(value, value_len);
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index a68f0d518e9..122eaf6243a 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -180,6 +180,9 @@ class GANDIVA_EXPORT LLVMGenerator {
   /// Generate code to load the vector at specified index and cast it as offsets array.
   llvm::Value* GetOffsetsReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
 
+  /// Generate code to load the vector at specified index and cast it as buffer pointer.
+  llvm::Value* GetDataBufferPtrReference(llvm::Value* arg_addrs, int idx, FieldPtr field);
+
   /// Generate code for the value array of one expression.
   Status CodeGenExprValue(DexPtr value_expr, FieldDescriptorPtr output, int suffix_idx,
                           llvm::Function** fn,
diff --git a/cpp/src/gandiva/native_function.h b/cpp/src/gandiva/native_function.h
index 82714c7de9f..540e07bde4c 100644
--- a/cpp/src/gandiva/native_function.h
+++ b/cpp/src/gandiva/native_function.h
@@ -45,7 +45,7 @@ class GANDIVA_EXPORT NativeFunction {
   static constexpr int32_t kNeedsFunctionHolder = (1 << 2);
   static constexpr int32_t kCanReturnErrors = (1 << 3);
 
-  const FunctionSignature& signature() const { return signature_; }
+  const std::vector<FunctionSignature>& signatures() const { return signatures_; }
   std::string pc_name() const { return pc_name_; }
   ResultNullableType result_nullable_type() const { return result_nullable_type_; }
 
@@ -53,16 +53,22 @@ class GANDIVA_EXPORT NativeFunction {
   bool NeedsFunctionHolder() const { return (flags_ & kNeedsFunctionHolder) != 0; }
   bool CanReturnErrors() const { return (flags_ & kCanReturnErrors) != 0; }
 
-  NativeFunction(const std::string& base_name, const DataTypeVector& param_types,
-                 DataTypePtr ret_type, const ResultNullableType& result_nullable_type,
+  NativeFunction(const std::string& base_name, const std::vector<std::string>& aliases,
+                 const DataTypeVector& param_types, DataTypePtr ret_type,
+                 const ResultNullableType& result_nullable_type,
                  const std::string& pc_name, int32_t flags = 0)
-      : signature_(base_name, param_types, ret_type),
+      : signatures_(),
         flags_(flags),
         result_nullable_type_(result_nullable_type),
-        pc_name_(pc_name) {}
+        pc_name_(pc_name) {
+    signatures_.push_back(FunctionSignature(base_name, param_types, ret_type));
+    for (auto& func_name : aliases) {
+      signatures_.push_back(FunctionSignature(func_name, param_types, ret_type));
+    }
+  }
 
  private:
-  FunctionSignature signature_;
+  std::vector<FunctionSignature> signatures_;
 
   /// attributes
   int32_t flags_;
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops.cc b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
index 950264025b8..9334e08ccf6 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops.cc
@@ -17,6 +17,7 @@
 
 extern "C" {
 
+#include <math.h>
 #include "./types.h"
 
 // Expand inner macro for all numeric types.
@@ -68,6 +69,15 @@ NUMERIC_TYPES(BINARY_SYMMETRIC, multiply, *)
 MOD_OP(mod, int64, int32, int32)
 MOD_OP(mod, int64, int64, int64)
 
+float64 mod_float64_float64(int64_t context, float64 x, float64 y) {
+  if (y == 0.0) {
+    char const* err_msg = "divide by zero error";
+    gdv_fn_context_set_error_msg(context, err_msg);
+    return 0.0;
+  }
+  return fmod(x, y);
+}
+
 // Relational binary fns : left, right params are same, return is bool.
 #define BINARY_RELATIONAL(NAME, TYPE, OP) \
   FORCE_INLINE                            \
@@ -170,4 +180,32 @@ NUMERIC_BOOL_DATE_FUNCTION(IS_NOT_DISTINCT_FROM)
 
 NUMERIC_FUNCTION(DIVIDE)
 
+#define DIV(TYPE)                                               \
+  FORCE_INLINE                                                  \
+  TYPE div_##TYPE##_##TYPE(int64 context, TYPE in1, TYPE in2) { \
+    if (in2 == 0) {                                             \
+      char const* err_msg = "divide by zero error";             \
+      gdv_fn_context_set_error_msg(context, err_msg);           \
+      return 0;                                                 \
+    }                                                           \
+    return static_cast<TYPE>(in1 / in2);                        \
+  }
+
+DIV(int32)
+DIV(int64)
+
+#define DIV_FLOAT(TYPE)                                         \
+  FORCE_INLINE                                                  \
+  TYPE div_##TYPE##_##TYPE(int64 context, TYPE in1, TYPE in2) { \
+    if (in2 == 0) {                                             \
+      char const* err_msg = "divide by zero error";             \
+      gdv_fn_context_set_error_msg(context, err_msg);           \
+      return 0;                                                 \
+    }                                                           \
+    return static_cast<TYPE>(::trunc(in1 / in2));               \
+  }
+
+DIV_FLOAT(float32)
+DIV_FLOAT(float64)
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
index e4f4ad89919..0375783fffb 100644
--- a/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/arithmetic_ops_test.cc
@@ -33,7 +33,30 @@ TEST(TestArithmeticOps, TestIsDistinctFrom) {
   EXPECT_EQ(is_not_distinct_from_int32_int32(1000, true, 1000, true), true);
 }
 
-TEST(TestArithmeticOps, TestMod) { EXPECT_EQ(mod_int64_int32(10, 0), 10); }
+TEST(TestArithmeticOps, TestMod) {
+  gandiva::ExecutionContext context;
+  EXPECT_EQ(mod_int64_int32(10, 0), 10);
+
+  const double acceptable_abs_error = 0.00000000001;  // 1e-10
+
+  EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<int64>(&context), 2.5, 0.0), 0.0);
+  EXPECT_TRUE(context.has_error());
+  EXPECT_EQ(context.get_error(), "divide by zero error");
+
+  context.Reset();
+  EXPECT_NEAR(mod_float64_float64(reinterpret_cast<int64>(&context), 2.5, 1.2), 0.1,
+              acceptable_abs_error);
+  EXPECT_FALSE(context.has_error());
+
+  context.Reset();
+  EXPECT_DOUBLE_EQ(mod_float64_float64(reinterpret_cast<int64>(&context), 2.5, 2.5), 0.0);
+  EXPECT_FALSE(context.has_error());
+
+  context.Reset();
+  EXPECT_NEAR(mod_float64_float64(reinterpret_cast<int64>(&context), 9.2, 3.7), 1.8,
+              acceptable_abs_error);
+  EXPECT_FALSE(context.has_error());
+}
 
 TEST(TestArithmeticOps, TestDivide) {
   gandiva::ExecutionContext context;
@@ -46,4 +69,32 @@ TEST(TestArithmeticOps, TestDivide) {
   EXPECT_EQ(context.has_error(), false);
 }
 
+TEST(TestArithmeticOps, TestDiv) {
+  gandiva::ExecutionContext context;
+  EXPECT_EQ(div_int64_int64(reinterpret_cast<int64>(&context), 101, 0), 0);
+  EXPECT_EQ(context.has_error(), true);
+  EXPECT_EQ(context.get_error(), "divide by zero error");
+  context.Reset();
+
+  EXPECT_EQ(div_int64_int64(reinterpret_cast<int64>(&context), 101, 111), 0);
+  EXPECT_EQ(context.has_error(), false);
+  context.Reset();
+
+  EXPECT_EQ(div_float64_float64(reinterpret_cast<int64>(&context), 1010.1010, 2.1),
+            481.0);
+  EXPECT_EQ(context.has_error(), false);
+  context.Reset();
+
+  EXPECT_EQ(div_float64_float64(reinterpret_cast<int64>(&context), 1010.1010, 0.00000),
+            0.0);
+  EXPECT_EQ(context.has_error(), true);
+  EXPECT_EQ(context.get_error(), "divide by zero error");
+  context.Reset();
+
+  EXPECT_EQ(div_float32_float32(reinterpret_cast<int64>(&context), 1010.1010f, 2.1f),
+            481.0f);
+  EXPECT_EQ(context.has_error(), false);
+  context.Reset();
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc b/cpp/src/gandiva/precompiled/decimal_ops.cc
index 6f20f59b581..902c8d799ae 100644
--- a/cpp/src/gandiva/precompiled/decimal_ops.cc
+++ b/cpp/src/gandiva/precompiled/decimal_ops.cc
@@ -467,7 +467,7 @@ int32_t Compare(const BasicDecimalScalar128& x, const BasicDecimalScalar128& y)
 
 #define DECIMAL_OVERFLOW_IF(condition, overflow) \
   do {                                           \
-    if (condition) {                             \
+    if (*overflow || (condition)) {              \
       *overflow = true;                          \
       return 0;                                  \
     }                                            \
@@ -491,9 +491,9 @@ static std::array<double, DecimalTypeUtil::kMaxPrecision + 1> kDoubleScaleMultip
 BasicDecimal128 FromDouble(double in, int32_t precision, int32_t scale, bool* overflow) {
   // Multiply decimal with the scale
   auto unscaled = in * kDoubleScaleMultipliers[scale];
+  DECIMAL_OVERFLOW_IF(std::isnan(unscaled), overflow);
+
   unscaled = std::round(unscaled);
-  DECIMAL_OVERFLOW_IF(std::isnan(unscaled) || std::fabs(unscaled) < std::fabs(in),
-                      overflow);
 
   // convert scaled double to int128
   int32_t sign = unscaled < 0 ? -1 : 1;
@@ -531,16 +531,6 @@ BasicDecimal128 FromInt64(int64_t in, int32_t precision, int32_t scale, bool* ov
   return in * BasicDecimal128::GetScaleMultiplier(scale);
 }
 
-int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow) {
-  BasicDecimal128 whole, fraction;
-
-  in.value().GetWholeAndFraction(in.scale(), &whole, &fraction);
-  DECIMAL_OVERFLOW_IF((whole > std::numeric_limits<int64_t>::max()) ||
-                          (whole < std::numeric_limits<int64_t>::min()),
-                      overflow);
-  return static_cast<int64_t>(whole.low_bits());
-}
-
 // Helper function to modify the scale and/or precision of a decimal value.
 static BasicDecimal128 ModifyScaleAndPrecision(const BasicDecimalScalar128& x,
                                                int32_t out_precision, int32_t out_scale,
@@ -702,5 +692,14 @@ BasicDecimal128 Convert(const BasicDecimalScalar128& x, int32_t out_precision,
                                 RoundType::kRoundTypeHalfRoundUp, overflow);
 }
 
+int64_t ToInt64(const BasicDecimalScalar128& in, bool* overflow) {
+  auto rounded = RoundWithPositiveScale(in, in.precision(), 0 /*scale*/,
+                                        RoundType::kRoundTypeHalfRoundUp, overflow);
+  DECIMAL_OVERFLOW_IF((rounded > std::numeric_limits<int64_t>::max()) ||
+                          (rounded < std::numeric_limits<int64_t>::min()),
+                      overflow);
+  return static_cast<int64_t>(rounded.low_bits());
+}
+
 }  // namespace decimalops
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
index b4fe2e6b2b7..290cc46946f 100644
--- a/cpp/src/gandiva/precompiled/decimal_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
@@ -872,6 +872,12 @@ TEST_F(TestDecimalSql, FromDouble) {
       std::make_tuple(DecimalScalar128{162850, 38, 4}, 16.285, false),
       std::make_tuple(DecimalScalar128{1629, 38, 2}, 16.285, false),
 
+      // round up
+      std::make_tuple(DecimalScalar128{1, 18, 0}, 1.15470053838, false),
+      std::make_tuple(DecimalScalar128{-1, 18, 0}, -1.15470053838, false),
+      std::make_tuple(DecimalScalar128{2, 18, 0}, 1.55470053838, false),
+      std::make_tuple(DecimalScalar128{-2, 18, 0}, -1.55470053838, false),
+
       // border cases
       std::make_tuple(DecimalScalar128{-kMaxDoubleInt, 38, 0},
                       static_cast<double>(-kMaxDoubleInt), false),
@@ -887,10 +893,16 @@ TEST_F(TestDecimalSql, FromDouble) {
       std::make_tuple(DecimalScalar128{1230, 38, 33}, 1.23E-30, false),
       std::make_tuple(DecimalScalar128{123, 38, 38}, 1.23E-36, false),
 
-      // overflow due to very low double
+      // very small doubles
       std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::min(),
+                      false),
+      std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::min(),
+                      false),
+
+      // overflow due to large -ve double
+      std::make_tuple(DecimalScalar128{0, 0, 38, 0}, -std::numeric_limits<double>::max(),
                       true),
-      // overflow due to very high double
+      // overflow due to large +ve double
       std::make_tuple(DecimalScalar128{0, 0, 38, 0}, std::numeric_limits<double>::max(),
                       true),
       // overflow due to scaling up.
@@ -1009,8 +1021,8 @@ TEST_F(TestDecimalSql, ToInt64) {
   std::vector<std::tuple<int64_t, DecimalScalar128, bool>> test_values = {
       // simple ones
       std::make_tuple(-16, DecimalScalar128{-16285, 38, 3}, false),
-      std::make_tuple(-162, DecimalScalar128{-16285, 38, 2}, false),
-      std::make_tuple(-1, DecimalScalar128{-16285, 38, 4}, false),
+      std::make_tuple(-163, DecimalScalar128{-16285, 38, 2}, false),
+      std::make_tuple(-2, DecimalScalar128{-16285, 38, 4}, false),
 
       // border cases
       std::make_tuple(INT64_MIN, DecimalScalar128{INT64_MIN, 38, 0}, false),
diff --git a/cpp/src/gandiva/precompiled/decimal_wrapper.cc b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
index 630fe8b1261..66ccb3e0ddb 100644
--- a/cpp/src/gandiva/precompiled/decimal_wrapper.cc
+++ b/cpp/src/gandiva/precompiled/decimal_wrapper.cc
@@ -35,12 +35,11 @@ void add_large_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_p
 }
 
 FORCE_INLINE
-void multiply_internal_decimal128_decimal128(int64_t x_high, uint64_t x_low,
-                                             int32_t x_precision, int32_t x_scale,
-                                             int64_t y_high, uint64_t y_low,
-                                             int32_t y_precision, int32_t y_scale,
-                                             int32_t out_precision, int32_t out_scale,
-                                             int64_t* out_high, uint64_t* out_low) {
+void multiply_decimal128_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                    int32_t x_scale, int64_t y_high, uint64_t y_low,
+                                    int32_t y_precision, int32_t y_scale,
+                                    int32_t out_precision, int32_t out_scale,
+                                    int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
   gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
   bool overflow;
@@ -53,10 +52,11 @@ void multiply_internal_decimal128_decimal128(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-void divide_internal_decimal128_decimal128(
-    int64_t context, int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
-    int64_t y_high, uint64_t y_low, int32_t y_precision, int32_t y_scale,
-    int32_t out_precision, int32_t out_scale, int64_t* out_high, uint64_t* out_low) {
+void divide_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
+                                  int32_t x_precision, int32_t x_scale, int64_t y_high,
+                                  uint64_t y_low, int32_t y_precision, int32_t y_scale,
+                                  int32_t out_precision, int32_t out_scale,
+                                  int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
   gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
   bool overflow;
@@ -69,12 +69,11 @@ void divide_internal_decimal128_decimal128(
 }
 
 FORCE_INLINE
-void mod_internal_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
-                                        int32_t x_precision, int32_t x_scale,
-                                        int64_t y_high, uint64_t y_low,
-                                        int32_t y_precision, int32_t y_scale,
-                                        int32_t out_precision, int32_t out_scale,
-                                        int64_t* out_high, uint64_t* out_low) {
+void mod_decimal128_decimal128(int64_t context, int64_t x_high, uint64_t x_low,
+                               int32_t x_precision, int32_t x_scale, int64_t y_high,
+                               uint64_t y_low, int32_t y_precision, int32_t y_scale,
+                               int32_t out_precision, int32_t out_scale,
+                               int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x(x_high, x_low, x_precision, x_scale);
   gandiva::BasicDecimalScalar128 y(y_high, y_low, y_precision, y_scale);
   bool overflow;
@@ -87,7 +86,7 @@ void mod_internal_decimal128_decimal128(int64_t context, int64_t x_high, uint64_
 }
 
 FORCE_INLINE
-int32_t compare_internal_decimal128_decimal128(int64_t x_high, uint64_t x_low,
+int32_t compare_decimal128_decimal128_internal(int64_t x_high, uint64_t x_low,
                                                int32_t x_precision, int32_t x_scale,
                                                int64_t y_high, uint64_t y_low,
                                                int32_t y_precision, int32_t y_scale) {
@@ -98,9 +97,9 @@ int32_t compare_internal_decimal128_decimal128(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-void abs_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                             int32_t x_scale, int32_t out_precision, int32_t out_scale,
-                             int64_t* out_high, uint64_t* out_low) {
+void abs_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+                    int32_t out_precision, int32_t out_scale, int64_t* out_high,
+                    uint64_t* out_low) {
   gandiva::BasicDecimal128 x(x_high, x_low);
   x.Abs();
   *out_high = x.high_bits();
@@ -108,9 +107,9 @@ void abs_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision
 }
 
 FORCE_INLINE
-void ceil_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                              int32_t x_scale, int32_t out_precision, int32_t out_scale,
-                              int64_t* out_high, uint64_t* out_low) {
+void ceil_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
+                     int32_t out_precision, int32_t out_scale, int64_t* out_high,
+                     uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -120,9 +119,9 @@ void ceil_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precisio
 }
 
 FORCE_INLINE
-void floor_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                               int32_t x_scale, int32_t out_precision, int32_t out_scale,
-                               int64_t* out_high, uint64_t* out_low) {
+void floor_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                      int32_t x_scale, int32_t out_precision, int32_t out_scale,
+                      int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -132,9 +131,9 @@ void floor_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precisi
 }
 
 FORCE_INLINE
-void round_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                               int32_t x_scale, int32_t out_precision, int32_t out_scale,
-                               int64_t* out_high, uint64_t* out_low) {
+void round_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                      int32_t x_scale, int32_t out_precision, int32_t out_scale,
+                      int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -144,10 +143,10 @@ void round_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precisi
 }
 
 FORCE_INLINE
-void round_decimal128_int32_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                     int32_t x_scale, int32_t rounding_scale,
-                                     int32_t out_precision, int32_t out_scale,
-                                     int64_t* out_high, uint64_t* out_low) {
+void round_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                            int32_t x_scale, int32_t rounding_scale,
+                            int32_t out_precision, int32_t out_scale, int64_t* out_high,
+                            uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -157,10 +156,9 @@ void round_decimal128_int32_internal(int64_t x_high, uint64_t x_low, int32_t x_p
 }
 
 FORCE_INLINE
-void truncate_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                  int32_t x_scale, int32_t out_precision,
-                                  int32_t out_scale, int64_t* out_high,
-                                  uint64_t* out_low) {
+void truncate_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                         int32_t x_scale, int32_t out_precision, int32_t out_scale,
+                         int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -170,11 +168,10 @@ void truncate_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_prec
 }
 
 FORCE_INLINE
-void truncate_decimal128_int32_internal(int64_t x_high, uint64_t x_low,
-                                        int32_t x_precision, int32_t x_scale,
-                                        int32_t rounding_scale, int32_t out_precision,
-                                        int32_t out_scale, int64_t* out_high,
-                                        uint64_t* out_low) {
+void truncate_decimal128_int32(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                               int32_t x_scale, int32_t rounding_scale,
+                               int32_t out_precision, int32_t out_scale,
+                               int64_t* out_high, uint64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -184,8 +181,8 @@ void truncate_decimal128_int32_internal(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-double castFLOAT8_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                      int32_t x_scale) {
+double castFLOAT8_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                             int32_t x_scale) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -193,8 +190,8 @@ double castFLOAT8_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_
 }
 
 FORCE_INLINE
-int64_t castBIGINT_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                       int32_t x_precision, int32_t x_scale) {
+int64_t castBIGINT_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                              int32_t x_scale) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
 
   bool overflow = false;
@@ -202,8 +199,8 @@ int64_t castBIGINT_decimal128_internal(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-void castDECIMAL_int64_internal(int64_t in, int32_t x_precision, int32_t x_scale,
-                                int64_t* out_high, uint64_t* out_low) {
+void castDECIMAL_int64(int64_t in, int32_t x_precision, int32_t x_scale,
+                       int64_t* out_high, uint64_t* out_low) {
   bool overflow = false;
   auto out = gandiva::decimalops::FromInt64(in, x_precision, x_scale, &overflow);
   *out_high = out.high_bits();
@@ -211,8 +208,14 @@ void castDECIMAL_int64_internal(int64_t in, int32_t x_precision, int32_t x_scale
 }
 
 FORCE_INLINE
-void castDECIMAL_float64_internal(double in, int32_t x_precision, int32_t x_scale,
-                                  int64_t* out_high, uint64_t* out_low) {
+void castDECIMAL_int32(int32_t in, int32_t x_precision, int32_t x_scale,
+                       int64_t* out_high, uint64_t* out_low) {
+  castDECIMAL_int64(in, x_precision, x_scale, out_high, out_low);
+}
+
+FORCE_INLINE
+void castDECIMAL_float64(double in, int32_t x_precision, int32_t x_scale,
+                         int64_t* out_high, uint64_t* out_low) {
   bool overflow = false;
   auto out = gandiva::decimalops::FromDouble(in, x_precision, x_scale, &overflow);
   *out_high = out.high_bits();
@@ -220,10 +223,15 @@ void castDECIMAL_float64_internal(double in, int32_t x_precision, int32_t x_scal
 }
 
 FORCE_INLINE
-void castDECIMAL_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                     int32_t x_scale, int32_t out_precision,
-                                     int32_t out_scale, int64_t* out_high,
-                                     int64_t* out_low) {
+void castDECIMAL_float32(float in, int32_t x_precision, int32_t x_scale,
+                         int64_t* out_high, uint64_t* out_low) {
+  castDECIMAL_float64(in, x_precision, x_scale, out_high, out_low);
+}
+
+FORCE_INLINE
+void castDECIMAL_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                            int32_t x_scale, int32_t out_precision, int32_t out_scale,
+                            int64_t* out_high, int64_t* out_low) {
   gandiva::BasicDecimalScalar128 x({x_high, x_low}, x_precision, x_scale);
   bool overflow = false;
   auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow);
@@ -232,32 +240,31 @@ void castDECIMAL_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_p
 }
 
 FORCE_INLINE
-int32_t hash32_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                   int32_t x_scale, boolean x_isvalid) {
+int32_t hash32_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                          int32_t x_scale, boolean x_isvalid) {
   return x_isvalid
              ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
              : 0;
 }
 
 FORCE_INLINE
-int32_t hash_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                 int32_t x_scale, boolean x_isvalid) {
-  return hash32_decimal128_internal(x_high, x_low, x_precision, x_scale, x_isvalid);
+int32_t hash_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                        int32_t x_scale, boolean x_isvalid) {
+  return hash32_decimal128(x_high, x_low, x_precision, x_scale, x_isvalid);
 }
 
 FORCE_INLINE
-int64_t hash64_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                   int32_t x_scale, boolean x_isvalid) {
+int64_t hash64_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                          int32_t x_scale, boolean x_isvalid) {
   return x_isvalid
              ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
              : 0;
 }
 
 FORCE_INLINE
-int32_t hash32WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                           int32_t x_precision, int32_t x_scale,
-                                           boolean x_isvalid, int32_t seed,
-                                           boolean seed_isvalid) {
+int32_t hash32WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                  int32_t x_scale, boolean x_isvalid, int32_t seed,
+                                  boolean seed_isvalid) {
   if (!x_isvalid) {
     return seed;
   }
@@ -265,10 +272,9 @@ int32_t hash32WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-int64_t hash64WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                           int32_t x_precision, int32_t x_scale,
-                                           boolean x_isvalid, int64_t seed,
-                                           boolean seed_isvalid) {
+int64_t hash64WithSeed_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                  int32_t x_scale, boolean x_isvalid, int64_t seed,
+                                  boolean seed_isvalid) {
   if (!x_isvalid) {
     return seed;
   }
@@ -276,28 +282,26 @@ int64_t hash64WithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
 }
 
 FORCE_INLINE
-int32_t hash32AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                           int32_t x_precision, int32_t x_scale,
-                                           boolean x_isvalid) {
+int32_t hash32AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                  int32_t x_scale, boolean x_isvalid) {
   return x_isvalid
              ? hash32_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
              : 0;
 }
 
 FORCE_INLINE
-int64_t hash64AsDouble_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                           int32_t x_precision, int32_t x_scale,
-                                           boolean x_isvalid) {
+int64_t hash64AsDouble_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                                  int32_t x_scale, boolean x_isvalid) {
   return x_isvalid
              ? hash64_buf(gandiva::BasicDecimal128(x_high, x_low).ToBytes().data(), 16, 0)
              : 0;
 }
 
 FORCE_INLINE
-int32_t hash32AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                                   int32_t x_precision, int32_t x_scale,
-                                                   boolean x_isvalid, int32_t seed,
-                                                   boolean seed_isvalid) {
+int32_t hash32AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low,
+                                          int32_t x_precision, int32_t x_scale,
+                                          boolean x_isvalid, int32_t seed,
+                                          boolean seed_isvalid) {
   if (!x_isvalid) {
     return seed;
   }
@@ -305,10 +309,10 @@ int32_t hash32AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_lo
 }
 
 FORCE_INLINE
-int64_t hash64AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_low,
-                                                   int32_t x_precision, int32_t x_scale,
-                                                   boolean x_isvalid, int64_t seed,
-                                                   boolean seed_isvalid) {
+int64_t hash64AsDoubleWithSeed_decimal128(int64_t x_high, uint64_t x_low,
+                                          int32_t x_precision, int32_t x_scale,
+                                          boolean x_isvalid, int64_t seed,
+                                          boolean seed_isvalid) {
   if (!x_isvalid) {
     return seed;
   }
@@ -316,46 +320,84 @@ int64_t hash64AsDoubleWithSeed_decimal128_internal(int64_t x_high, uint64_t x_lo
 }
 
 FORCE_INLINE
-boolean isnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                   int32_t x_scale, boolean x_isvalid) {
+boolean isnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                          int32_t x_scale, boolean x_isvalid) {
   return !x_isvalid;
 }
 
 FORCE_INLINE
-boolean isnotnull_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                      int32_t x_scale, boolean x_isvalid) {
+boolean isnotnull_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                             int32_t x_scale, boolean x_isvalid) {
   return x_isvalid;
 }
 
 FORCE_INLINE
-boolean isnumeric_decimal128_internal(int64_t x_high, uint64_t x_low, int32_t x_precision,
-                                      int32_t x_scale, boolean x_isvalid) {
+boolean isnumeric_decimal128(int64_t x_high, uint64_t x_low, int32_t x_precision,
+                             int32_t x_scale, boolean x_isvalid) {
   return x_isvalid;
 }
 
 FORCE_INLINE
-boolean is_not_distinct_from_decimal128_decimal128_internal(
-    int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
-    boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
-    int32_t y_scale, boolean y_isvalid) {
+boolean is_not_distinct_from_decimal128_decimal128(int64_t x_high, uint64_t x_low,
+                                                   int32_t x_precision, int32_t x_scale,
+                                                   boolean x_isvalid, int64_t y_high,
+                                                   uint64_t y_low, int32_t y_precision,
+                                                   int32_t y_scale, boolean y_isvalid) {
   if (x_isvalid != y_isvalid) {
     return false;
   }
   if (!x_isvalid) {
     return true;
   }
-  return 0 == compare_internal_decimal128_decimal128(x_high, x_low, x_precision, x_scale,
+  return 0 == compare_decimal128_decimal128_internal(x_high, x_low, x_precision, x_scale,
                                                      y_high, y_low, y_precision, y_scale);
 }
 
 FORCE_INLINE
-boolean is_distinct_from_decimal128_decimal128_internal(
-    int64_t x_high, uint64_t x_low, int32_t x_precision, int32_t x_scale,
-    boolean x_isvalid, int64_t y_high, uint64_t y_low, int32_t y_precision,
-    int32_t y_scale, boolean y_isvalid) {
-  return !is_not_distinct_from_decimal128_decimal128_internal(
-      x_high, x_low, x_precision, x_scale, x_isvalid, y_high, y_low, y_precision, y_scale,
-      y_isvalid);
+boolean is_distinct_from_decimal128_decimal128(int64_t x_high, uint64_t x_low,
+                                               int32_t x_precision, int32_t x_scale,
+                                               boolean x_isvalid, int64_t y_high,
+                                               uint64_t y_low, int32_t y_precision,
+                                               int32_t y_scale, boolean y_isvalid) {
+  return !is_not_distinct_from_decimal128_decimal128(x_high, x_low, x_precision, x_scale,
+                                                     x_isvalid, y_high, y_low,
+                                                     y_precision, y_scale, y_isvalid);
+}
+
+FORCE_INLINE
+void castDECIMAL_utf8(int64_t context, const char* in, int32_t in_length,
+                      int32_t out_precision, int32_t out_scale, int64_t* out_high,
+                      uint64_t* out_low) {
+  int64_t dec_high_from_str;
+  uint64_t dec_low_from_str;
+  int32_t precision_from_str;
+  int32_t scale_from_str;
+  int32_t status =
+      gdv_fn_dec_from_string(context, in, in_length, &precision_from_str, &scale_from_str,
+                             &dec_high_from_str, &dec_low_from_str);
+  if (status != 0) {
+    return;
+  }
+
+  gandiva::BasicDecimalScalar128 x({dec_high_from_str, dec_low_from_str},
+                                   precision_from_str, scale_from_str);
+  bool overflow = false;
+  auto out = gandiva::decimalops::Convert(x, out_precision, out_scale, &overflow);
+  *out_high = out.high_bits();
+  *out_low = out.low_bits();
+}
+
+FORCE_INLINE
+char* castVARCHAR_decimal128_int64(int64_t context, int64_t x_high, uint64_t x_low,
+                                   int32_t x_precision, int32_t x_scale,
+                                   int64_t out_len_param, int32_t* out_length) {
+  int32_t full_dec_str_len;
+  char* dec_str =
+      gdv_fn_dec_to_string(context, x_high, x_low, x_scale, &full_dec_str_len);
+  int32_t trunc_dec_str_len =
+      out_len_param < full_dec_str_len ? out_len_param : full_dec_str_len;
+  *out_length = trunc_dec_str_len;
+  return dec_str;
 }
 
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 493a3ae772d..397cf99ddf5 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -121,9 +121,14 @@ double months_between_timestamp_timestamp(uint64, uint64);
 int32 mem_compare(const char* left, int32 left_len, const char* right, int32 right_len);
 
 int32 mod_int64_int32(int64 left, int32 right);
+float64 mod_float64_float64(int64 context, float64 left, float64 right);
 
 int64 divide_int64_int64(int64 context, int64 in1, int64 in2);
 
+int64 div_int64_int64(int64 context, int64 in1, int64 in2);
+float32 div_float32_float32(int64 context, float32 in1, float32 in2);
+float64 div_float64_float64(int64 context, float64 in1, float64 in2);
+
 float64 cbrt_int32(int32);
 float64 cbrt_int64(int64);
 float64 cbrt_float32(float32);
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index 6493fd4d908..d8c6a80b52f 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -168,28 +168,50 @@ Status Projector::Evaluate(const arrow::RecordBatch& batch,
   return Status::OK();
 }
 
-// TODO : handle variable-len vectors
+// TODO : handle complex vectors (list/map/..)
 Status Projector::AllocArrayData(const DataTypePtr& type, int64_t num_records,
                                  arrow::MemoryPool* pool, ArrayDataPtr* array_data) {
-  const auto* fw_type = dynamic_cast<const arrow::FixedWidthType*>(type.get());
-  ARROW_RETURN_IF(fw_type == nullptr,
-                  Status::Invalid("Unsupported output data type ", type));
-
-  std::shared_ptr<arrow::Buffer> null_bitmap;
-  int64_t bitmap_bytes = arrow::BitUtil::BytesForBits(num_records);
-  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, bitmap_bytes, &null_bitmap));
+  arrow::Status astatus;
+  std::vector<std::shared_ptr<arrow::Buffer>> buffers;
+
+  // The output vector always has a null bitmap.
+  std::shared_ptr<arrow::Buffer> bitmap_buffer;
+  int64_t size = arrow::BitUtil::BytesForBits(num_records);
+  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, size, &bitmap_buffer));
+  buffers.push_back(bitmap_buffer);
+
+  // String/Binary vectors have an offsets array.
+  auto type_id = type->id();
+  if (arrow::is_binary_like(type_id)) {
+    std::shared_ptr<arrow::Buffer> offsets_buffer;
+    auto offsets_len = arrow::BitUtil::BytesForBits((num_records + 1) * 32);
+
+    ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, offsets_len, &offsets_buffer));
+    buffers.push_back(offsets_buffer);
+  }
 
-  std::shared_ptr<arrow::Buffer> data;
-  int64_t data_len = arrow::BitUtil::BytesForBits(num_records * fw_type->bit_width());
-  ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(pool, data_len, &data));
+  // The output vector always has a data array.
+  int64_t data_len;
+  std::shared_ptr<arrow::ResizableBuffer> data_buffer;
+  if (arrow::is_primitive(type_id) || type_id == arrow::Type::DECIMAL) {
+    const auto& fw_type = dynamic_cast<const arrow::FixedWidthType&>(*type);
+    data_len = arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width());
+  } else if (arrow::is_binary_like(type_id)) {
+    // we don't know the expected size for varlen output vectors.
+    data_len = 0;
+  } else {
+    return Status::Invalid("Unsupported output data type " + type->ToString());
+  }
+  ARROW_RETURN_NOT_OK(arrow::AllocateResizableBuffer(pool, data_len, &data_buffer));
 
   // This is not strictly required but valgrind gets confused and detects this
   // as uninitialized memory access. See arrow::util::SetBitTo().
   if (type->id() == arrow::Type::BOOL) {
-    memset(data->mutable_data(), 0, data_len);
+    memset(data_buffer->mutable_data(), 0, data_len);
   }
+  buffers.push_back(data_buffer);
 
-  *array_data = arrow::ArrayData::Make(type, num_records, {null_bitmap, data});
+  *array_data = arrow::ArrayData::Make(type, num_records, buffers);
   return Status::OK();
 }
 
@@ -213,13 +235,32 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
   ARROW_RETURN_IF(bitmap_len < min_bitmap_len,
                   Status::Invalid("Bitmap buffer too small for ", field.name()));
 
-  // verify size of data buffer.
-  // TODO : handle variable-len vectors
-  const auto& fw_type = dynamic_cast<const arrow::FixedWidthType&>(*field.type());
-  int64_t min_data_len = arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width());
-  int64_t data_len = array_data.buffers[1]->capacity();
-  ARROW_RETURN_IF(data_len < min_data_len,
-                  Status::Invalid("Data buffer too small for ", field.name()));
+  auto type_id = field.type()->id();
+  if (arrow::is_binary_like(type_id)) {
+    // validate size of offsets buffer.
+    int64_t min_offsets_len = arrow::BitUtil::BytesForBits((num_records + 1) * 32);
+    int64_t offsets_len = array_data.buffers[1]->capacity();
+    ARROW_RETURN_IF(
+        offsets_len < min_offsets_len,
+        Status::Invalid("offsets buffer too small for ", field.name(),
+                        " minimum required ", min_offsets_len, " actual ", offsets_len));
+
+    // check that it's resizable.
+    auto resizable = dynamic_cast<arrow::ResizableBuffer*>(array_data.buffers[2].get());
+    ARROW_RETURN_IF(
+        resizable == nullptr,
+        Status::Invalid("data buffer for varlen output vectors must be resizable"));
+  } else if (arrow::is_primitive(type_id) || type_id == arrow::Type::DECIMAL) {
+    // verify size of data buffer.
+    const auto& fw_type = dynamic_cast<const arrow::FixedWidthType&>(*field.type());
+    int64_t min_data_len =
+        arrow::BitUtil::BytesForBits(num_records * fw_type.bit_width());
+    int64_t data_len = array_data.buffers[1]->capacity();
+    ARROW_RETURN_IF(data_len < min_data_len,
+                    Status::Invalid("Data buffer too small for ", field.name()));
+  } else {
+    return Status::Invalid("Unsupported output data type " + field.type()->ToString());
+  }
 
   return Status::OK();
 }
diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h
index 0aa09dfe3bd..ff2fbc7b38d 100644
--- a/cpp/src/gandiva/projector.h
+++ b/cpp/src/gandiva/projector.h
@@ -122,8 +122,8 @@ class GANDIVA_EXPORT Projector {
             const FieldVector& output_fields, std::shared_ptr<Configuration>);
 
   /// Allocate an ArrowData of length 'length'.
-  Status AllocArrayData(const DataTypePtr& type, int64_t length, arrow::MemoryPool* pool,
-                        ArrayDataPtr* array_data);
+  Status AllocArrayData(const DataTypePtr& type, int64_t num_records,
+                        arrow::MemoryPool* pool, ArrayDataPtr* array_data);
 
   /// Validate that the ArrayData has sufficient capacity to accomodate 'num_records'.
   Status ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
diff --git a/cpp/src/gandiva/symbols.map b/cpp/src/gandiva/symbols.map
new file mode 100644
index 00000000000..77f00010682
--- /dev/null
+++ b/cpp/src/gandiva/symbols.map
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+  # Symbols marked as 'local' are not exported by the DSO and thus may not
+  # be used by client applications.
+  local:
+    # devtoolset / static-libstdc++ symbols
+    __cxa_*;
+    __once_proxy;
+
+    extern "C++" {
+      # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically
+      # links c++11 symbols into binaries so that the result may be executed on
+      # a system with an older libstdc++ which doesn't include the necessary
+      # c++11 symbols.
+      std::*;
+      *std::__once_call*;
+    };
+};
+
diff --git a/cpp/src/gandiva/tests/decimal_test.cc b/cpp/src/gandiva/tests/decimal_test.cc
index 9941feadff5..7e07c12e9f7 100644
--- a/cpp/src/gandiva/tests/decimal_test.cc
+++ b/cpp/src/gandiva/tests/decimal_test.cc
@@ -29,6 +29,7 @@
 
 using arrow::boolean;
 using arrow::Decimal128;
+using arrow::utf8;
 
 namespace gandiva {
 
@@ -411,15 +412,22 @@ TEST_F(TestDecimal, TestCastFunctions) {
   constexpr int32_t scale = 2;
   auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
   auto decimal_type_scale_1 = std::make_shared<arrow::Decimal128Type>(precision, 1);
-  auto field_int64 = field("intt64", arrow::int64());
+  auto field_int32 = field("int32", arrow::int32());
+  auto field_int64 = field("int64", arrow::int64());
+  auto field_float32 = field("float32", arrow::float32());
   auto field_float64 = field("float64", arrow::float64());
   auto field_dec = field("dec", decimal_type);
-  auto schema = arrow::schema({field_int64, field_float64, field_dec});
+  auto schema =
+      arrow::schema({field_int32, field_int64, field_float32, field_float64, field_dec});
 
   // build expressions
   auto exprs = std::vector<ExpressionPtr>{
+      TreeExprBuilder::MakeExpression("castDECIMAL", {field_int32},
+                                      field("int32_to_dec", decimal_type)),
       TreeExprBuilder::MakeExpression("castDECIMAL", {field_int64},
                                       field("int64_to_dec", decimal_type)),
+      TreeExprBuilder::MakeExpression("castDECIMAL", {field_float32},
+                                      field("float32_to_dec", decimal_type)),
       TreeExprBuilder::MakeExpression("castDECIMAL", {field_float64},
                                       field("float64_to_dec", decimal_type)),
       TreeExprBuilder::MakeExpression("castDECIMAL", {field_dec},
@@ -439,15 +447,18 @@ TEST_F(TestDecimal, TestCastFunctions) {
   int num_records = 4;
   auto validity = {true, true, true, true};
 
+  auto array_int32 = MakeArrowArrayInt32({123, 158, -123, -158});
   auto array_int64 = MakeArrowArrayInt64({123, 158, -123, -158});
+  auto array_float32 = MakeArrowArrayFloat32({1.23f, 1.58f, -1.23f, -1.58f});
   auto array_float64 = MakeArrowArrayFloat64({1.23, 1.58, -1.23, -1.58});
   auto array_dec = MakeArrowArrayDecimal(
       decimal_type, MakeDecimalVector({"1.23", "1.58", "-1.23", "-1.58"}, scale),
       validity);
 
   // prepare input record batch
-  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
-                                           {array_int64, array_float64, array_dec});
+  auto in_batch = arrow::RecordBatch::Make(
+      schema, num_records,
+      {array_int32, array_int64, array_float32, array_float64, array_dec});
 
   // Evaluate expression
   arrow::ArrayVector outputs;
@@ -456,28 +467,34 @@ TEST_F(TestDecimal, TestCastFunctions) {
 
   // Validate results
 
-  // castDECIMAL(int64)
+  // castDECIMAL(int32)
   EXPECT_ARROW_ARRAY_EQUALS(
       MakeArrowArrayDecimal(decimal_type,
                             MakeDecimalVector({"123", "158", "-123", "-158"}, scale),
                             validity),
       outputs[0]);
 
+  // castDECIMAL(int64)
+  EXPECT_ARROW_ARRAY_EQUALS(array_dec, outputs[2]);
+
+  // castDECIMAL(float32)
+  EXPECT_ARROW_ARRAY_EQUALS(array_dec, outputs[2]);
+
   // castDECIMAL(float64)
-  EXPECT_ARROW_ARRAY_EQUALS(array_dec, outputs[1]);
+  EXPECT_ARROW_ARRAY_EQUALS(array_dec, outputs[3]);
 
   // castDECIMAL(decimal)
   EXPECT_ARROW_ARRAY_EQUALS(
       MakeArrowArrayDecimal(arrow::decimal(precision, 1),
                             MakeDecimalVector({"1.2", "1.6", "-1.2", "-1.6"}, 1),
                             validity),
-      outputs[2]);
+      outputs[4]);
 
   // castBIGINT(decimal)
-  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayInt64({1, 1, -1, -1}), outputs[3]);
+  EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayInt64({1, 2, -1, -2}), outputs[5]);
 
   // castDOUBLE(decimal)
-  EXPECT_ARROW_ARRAY_EQUALS(array_float64, outputs[4]);
+  EXPECT_ARROW_ARRAY_EQUALS(array_float64, outputs[6]);
 }
 
 // isnull, isnumeric
@@ -787,4 +804,266 @@ TEST_F(TestDecimal, TestHash64WithSeed) {
   // hash with, without seed are not equal
   EXPECT_NE(int64_arr_WS->Value(4), int64_arr->Value(4));
 }
+
+TEST_F(TestDecimal, TestNullDecimalConstant) {
+  // schema for input fields
+  constexpr int32_t precision = 36;
+  constexpr int32_t scale = 18;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+  auto field_b = field("b", decimal_type);
+  auto field_c = field("c", arrow::boolean());
+  auto schema = arrow::schema({field_b, field_c});
+
+  // output fields
+  auto field_result = field("res", decimal_type);
+
+  // build expression.
+  // if (c)
+  //   null
+  // else
+  //   b
+  auto node_a = TreeExprBuilder::MakeNull(decimal_type);
+  auto node_b = TreeExprBuilder::MakeField(field_b);
+  auto node_c = TreeExprBuilder::MakeField(field_c);
+  auto if_node = TreeExprBuilder::MakeIf(node_c, node_a, node_b, decimal_type);
+
+  auto expr = TreeExprBuilder::MakeExpression(if_node, field_result);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+  Status status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  DCHECK_OK(status);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+
+  auto array_b =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"2", "3", "4", "5"}, scale),
+                            {true, true, true, true});
+
+  auto array_c = MakeArrowArrayBool({true, false, true, false}, {true, true, true, true});
+
+  // expected output
+  auto exp =
+      MakeArrowArrayDecimal(decimal_type, MakeDecimalVector({"0", "3", "3", "5"}, scale),
+                            {false, true, false, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_b, array_c});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  DCHECK_OK(status);
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
+TEST_F(TestDecimal, TestCastVarCharDecimal) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+
+  auto field_dec = field("dec", decimal_type);
+  auto field_res_str = field("res_str", utf8());
+  auto field_res_str_1 = field("res_str_1", utf8());
+  auto schema = arrow::schema({field_dec, field_res_str, field_res_str_1});
+
+  // output fields
+  auto res_str = field("res_str", utf8());
+  auto equals_res_bool = field("equals_res", boolean());
+
+  // build expressions.
+  auto node_dec = TreeExprBuilder::MakeField(field_dec);
+  auto node_res_str = TreeExprBuilder::MakeField(field_res_str);
+  auto node_res_str_1 = TreeExprBuilder::MakeField(field_res_str_1);
+  // limits decimal string to input length
+  auto str_len_limit = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(5));
+  auto str_len_limit_1 = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(1));
+  auto cast_varchar =
+      TreeExprBuilder::MakeFunction("castVARCHAR", {node_dec, str_len_limit}, utf8());
+  auto cast_varchar_1 =
+      TreeExprBuilder::MakeFunction("castVARCHAR", {node_dec, str_len_limit_1}, utf8());
+  auto equals =
+      TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_res_str}, boolean());
+  auto equals_1 =
+      TreeExprBuilder::MakeFunction("equal", {cast_varchar_1, node_res_str_1}, boolean());
+  auto expr = TreeExprBuilder::MakeExpression(equals, equals_res_bool);
+  auto expr_1 = TreeExprBuilder::MakeExpression(equals_1, equals_res_bool);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {expr, expr_1}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array_dec = MakeArrowArrayDecimal(
+      decimal_type,
+      MakeDecimalVector({"10.51", "1.23", "100.23", "-1000.23", "-0000.10"}, scale),
+      {true, false, true, true, true});
+  auto array_str_res = MakeArrowArrayUtf8({"10.51", "-null-", "100.2", "-1000", "-0.10"},
+                                          {true, false, true, true, true});
+  auto array_str_res_1 =
+      MakeArrowArrayUtf8({"1", "-null-", "1", "-", "-"}, {true, false, true, true, true});
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records,
+                                           {array_dec, array_str_res, array_str_res_1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  auto exp = MakeArrowArrayBool({true, false, true, true, true},
+                                {true, false, true, true, true});
+  auto exp_1 = MakeArrowArrayBool({true, false, true, true, true},
+                                  {true, false, true, true, true});
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[1]);
+}
+
+TEST_F(TestDecimal, TestCastDecimalVarChar) {
+  // schema for input fields
+  constexpr int32_t precision = 4;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+
+  auto field_str = field("in_str", utf8());
+  auto schema = arrow::schema({field_str});
+
+  // output fields
+  auto res_dec = field("res_dec", decimal_type);
+
+  // build expressions.
+  auto node_str = TreeExprBuilder::MakeField(field_str);
+  auto cast_decimal =
+      TreeExprBuilder::MakeFunction("castDECIMAL", {node_str}, decimal_type);
+  auto expr = TreeExprBuilder::MakeExpression(cast_decimal, res_dec);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+
+  auto array_str = MakeArrowArrayUtf8({"10.5134", "-0.0", "-0.1", "10.516", "-1000"},
+                                      {true, false, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_str});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  auto array_dec = MakeArrowArrayDecimal(
+      decimal_type, MakeDecimalVector({"10.51", "1.23", "-0.10", "10.52", "0.00"}, scale),
+      {true, false, true, true, true});
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(array_dec, outputs[0]);
+}
+
+TEST_F(TestDecimal, TestCastDecimalVarCharInvalidInput) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 0;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+
+  auto field_str = field("in_str", utf8());
+  auto schema = arrow::schema({field_str});
+
+  // output fields
+  auto res_dec = field("res_dec", decimal_type);
+
+  // build expressions.
+  auto node_str = TreeExprBuilder::MakeField(field_str);
+  auto cast_decimal =
+      TreeExprBuilder::MakeFunction("castDECIMAL", {node_str}, decimal_type);
+  auto expr = TreeExprBuilder::MakeExpression(cast_decimal, res_dec);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+
+  // imvalid input
+  auto invalid_in = MakeArrowArrayUtf8({"a10.5134", "-0.0", "-0.1", "10.516", "-1000"},
+                                       {true, false, true, true, true});
+
+  // prepare input record batch
+  auto in_batch_1 = arrow::RecordBatch::Make(schema, num_records, {invalid_in});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs_1;
+  status = projector->Evaluate(*in_batch_1, pool_, &outputs_1);
+  EXPECT_FALSE(status.ok()) << status.message();
+  EXPECT_TRUE(status.message().find("not a valid decimal number") != std::string::npos);
+}
+
+TEST_F(TestDecimal, TestVarCharDecimalNestedCast) {
+  // schema for input fields
+  constexpr int32_t precision = 38;
+  constexpr int32_t scale = 2;
+  auto decimal_type = std::make_shared<arrow::Decimal128Type>(precision, scale);
+
+  auto field_dec = field("dec", decimal_type);
+  auto schema = arrow::schema({field_dec});
+
+  // output fields
+  auto field_dec_res = field("dec_res", decimal_type);
+
+  // build expressions.
+  auto node_dec = TreeExprBuilder::MakeField(field_dec);
+
+  // limits decimal string to input length
+  auto str_len_limit = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(5));
+  auto cast_varchar =
+      TreeExprBuilder::MakeFunction("castVARCHAR", {node_dec, str_len_limit}, utf8());
+  auto cast_decimal =
+      TreeExprBuilder::MakeFunction("castDECIMAL", {cast_varchar}, decimal_type);
+
+  auto expr = TreeExprBuilder::MakeExpression(cast_decimal, field_dec_res);
+
+  // Build a projector for the expressions.
+  std::shared_ptr<Projector> projector;
+
+  auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 5;
+  auto array_dec = MakeArrowArrayDecimal(
+      decimal_type,
+      MakeDecimalVector({"10.51", "1.23", "100.23", "-1000.23", "-0000.10"}, scale),
+      {true, false, true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_dec});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  auto array_dec_res = MakeArrowArrayDecimal(
+      decimal_type,
+      MakeDecimalVector({"10.51", "1.23", "100.20", "-1000.00", "-0.10"}, scale),
+      {true, false, true, true, true});
+  EXPECT_ARROW_ARRAY_EQUALS(array_dec_res, outputs[0]);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc
index ea9a76ce805..103992d23fe 100644
--- a/cpp/src/gandiva/tests/utf8_test.cc
+++ b/cpp/src/gandiva/tests/utf8_test.cc
@@ -506,19 +506,37 @@ TEST_F(TestUtf8, TestIsNull) {
 
 TEST_F(TestUtf8, TestVarlenOutput) {
   // schema for input fields
-  auto field_a = field("a", utf8());
+  auto field_a = field("a", boolean());
   auto schema = arrow::schema({field_a});
 
   // build expressions.
-  auto expr = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeField(field_a),
-                                              field("res", utf8()));
+  // if (a) literal_hi else literal_bye
+  auto if_node = TreeExprBuilder::MakeIf(
+      TreeExprBuilder::MakeField(field_a), TreeExprBuilder::MakeStringLiteral("hi"),
+      TreeExprBuilder::MakeStringLiteral("bye"), utf8());
+  auto expr = TreeExprBuilder::MakeExpression(if_node, field("res", utf8()));
 
   // Build a projector for the expressions.
   std::shared_ptr<Projector> projector;
 
   // assert that it fails gracefully.
-  ASSERT_RAISES(NotImplemented,
-                Projector::Make(schema, {expr}, TestConfiguration(), &projector));
+  ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto array_in =
+      MakeArrowArrayBool({true, false, false, false}, {true, true, true, false});
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_in});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  ASSERT_OK(projector->Evaluate(*in_batch, pool_, &outputs));
+
+  // expected output
+  auto exp = MakeArrowArrayUtf8({"hi", "bye", "bye", "bye"}, {true, true, true, true});
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
 
 TEST_F(TestUtf8, TestCastVarChar) {
diff --git a/cpp/src/gandiva/tree_expr_builder.cc b/cpp/src/gandiva/tree_expr_builder.cc
index a63b700c2ee..51c640cb0d7 100644
--- a/cpp/src/gandiva/tree_expr_builder.cc
+++ b/cpp/src/gandiva/tree_expr_builder.cc
@@ -99,7 +99,9 @@ NodePtr TreeExprBuilder::MakeNull(DataTypePtr data_type) {
     case arrow::Type::TIMESTAMP:
       return std::make_shared<LiteralNode>(data_type, LiteralHolder((int64_t)0), true);
     case arrow::Type::DECIMAL: {
-      DecimalScalar128 literal(0, 0);
+      std::shared_ptr<arrow::DecimalType> decimal_type =
+          arrow::internal::checked_pointer_cast<arrow::DecimalType>(data_type);
+      DecimalScalar128 literal(decimal_type->precision(), decimal_type->scale());
       return std::make_shared<LiteralNode>(data_type, LiteralHolder(literal), true);
     }
     default:
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index cb8de1657d6..8a077985061 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -157,7 +157,6 @@ add_custom_command(OUTPUT ${THRIFT_OUTPUT_FILES}
 
 set(PARQUET_SRCS
     arrow/reader.cc
-    arrow/record_reader.cc
     arrow/schema.cc
     arrow/writer.cc
     bloom_filter.cc
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 2cc8b0d05f4..c5d638d9788 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -50,7 +50,6 @@ using arrow::Array;
 using arrow::ArrayVisitor;
 using arrow::Buffer;
 using arrow::ChunkedArray;
-using arrow::Column;
 using arrow::DataType;
 using arrow::default_memory_pool;
 using arrow::ListArray;
@@ -74,8 +73,6 @@ using parquet::schema::GroupNode;
 using parquet::schema::NodePtr;
 using parquet::schema::PrimitiveNode;
 
-using ColumnVector = std::vector<std::shared_ptr<arrow::Column>>;
-
 namespace parquet {
 namespace arrow {
 
@@ -581,7 +578,7 @@ class TestParquetIO : public ::testing::Test {
     ASSERT_EQ(1, out->num_columns());
     ASSERT_EQ(values->length(), out->num_rows());
 
-    std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+    std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
     ASSERT_EQ(1, chunked_array->num_chunks());
     auto result = chunked_array->chunk(0);
 
@@ -661,7 +658,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredWrite) {
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(100, out->num_rows());
 
-  std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+  std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
   ASSERT_EQ(1, chunked_array->num_chunks());
 
   AssertArraysEqual(*values, *chunked_array->chunk(0));
@@ -841,7 +838,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWriteArrowIO) {
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(values->length(), out->num_rows());
 
-  std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+  std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
   ASSERT_EQ(1, chunked_array->num_chunks());
 
   AssertArraysEqual(*values, *chunked_array->chunk(0));
@@ -939,9 +936,7 @@ TYPED_TEST(TestParquetIO, CheckIterativeColumnRead) {
   }
 
   auto chunked = std::make_shared<::arrow::ChunkedArray>(batches);
-  auto chunked_col =
-      std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
-  auto chunked_table = ::arrow::Table::Make(table->schema(), {chunked_col});
+  auto chunked_table = ::arrow::Table::Make(table->schema(), {chunked});
   ASSERT_TRUE(table->Equals(*chunked_table));
 }
 
@@ -1099,7 +1094,7 @@ TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) {
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(100, out->num_rows());
 
-  std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+  std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
   ASSERT_EQ(1, chunked_array->num_chunks());
 
   AssertArraysEqual(*values, *chunked_array->chunk(0));
@@ -1124,7 +1119,7 @@ TEST_F(TestNullParquetIO, NullColumn) {
     ASSERT_EQ(1, out->num_columns());
     ASSERT_EQ(num_rows, out->num_rows());
 
-    std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+    std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
     ASSERT_EQ(1, chunked_array->num_chunks());
     AssertArraysEqual(*values, *chunked_array->chunk(0));
   }
@@ -1154,7 +1149,7 @@ TEST_F(TestNullParquetIO, NullListColumn) {
     ASSERT_EQ(1, out->num_columns());
     ASSERT_EQ(offsets.size() - 1, out->num_rows());
 
-    std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+    std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
     ASSERT_EQ(1, chunked_array->num_chunks());
     AssertArraysEqual(*list_array, *chunked_array->chunk(0));
   }
@@ -1181,7 +1176,7 @@ TEST_F(TestNullParquetIO, NullDictionaryColumn) {
   ASSERT_EQ(1, out->num_columns());
   ASSERT_EQ(100, out->num_rows());
 
-  std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+  std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
   ASSERT_EQ(1, chunked_array->num_chunks());
 
   std::shared_ptr<Array> expected_values =
@@ -1243,7 +1238,7 @@ class TestPrimitiveParquetIO : public TestParquetIO<TestType> {
     ASSERT_EQ(1, out->num_columns());
     ASSERT_EQ(SMALL_SIZE, out->num_rows());
 
-    std::shared_ptr<ChunkedArray> chunked_array = out->column(0)->data();
+    std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
     ASSERT_EQ(1, chunked_array->num_chunks());
     ExpectArrayT<TestType>(values.data(), chunked_array->chunk(0).get());
   }
@@ -1325,16 +1320,7 @@ void MakeDateTimeTypesTable(std::shared_ptr<Table>* out, bool expected = false)
   ArrayFromVector<::arrow::Time64Type, int64_t>(f5->type(), is_valid, t64_us_values, &a5);
   ArrayFromVector<::arrow::Time64Type, int64_t>(f6->type(), is_valid, t64_ns_values, &a6);
 
-  std::vector<std::shared_ptr<::arrow::Column>> columns = {
-      std::make_shared<Column>("f0", a0),
-      std::make_shared<Column>("f1", a1),
-      std::make_shared<Column>("f2", a2),
-      std::make_shared<Column>("f3", (expected ? a3_x : a3)),
-      std::make_shared<Column>("f4", a4),
-      std::make_shared<Column>("f5", a5),
-      std::make_shared<Column>("f6", a6)};
-
-  *out = Table::Make(schema, columns);
+  *out = Table::Make(schema, {a0, a1, a2, expected ? a3_x : a3, a4, a5, a6});
 }
 
 TEST(TestArrowReadWrite, DateTimeTypes) {
@@ -1380,19 +1366,13 @@ TEST(TestArrowReadWrite, UseDeprecatedInt96) {
   // Each input is typed with a unique TimeUnit
   auto input_schema = schema(
       {field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us), field("f_ns", t_ns)});
-  auto input = Table::Make(
-      input_schema,
-      {std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
-       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});
+  auto input = Table::Make(input_schema, {a_s, a_ms, a_us, a_ns});
 
   // When reading parquet files, all int96 schema fields are converted to
   // timestamp nanoseconds
   auto ex_schema = schema({field("f_s", t_ns), field("f_ms", t_ns), field("f_us", t_ns),
                            field("f_ns", t_ns)});
-  auto ex_result = Table::Make(
-      ex_schema,
-      {std::make_shared<Column>("f_s", a_ns), std::make_shared<Column>("f_ms", a_ns),
-       std::make_shared<Column>("f_us", a_ns), std::make_shared<Column>("f_ns", a_ns)});
+  auto ex_result = Table::Make(ex_schema, {a_ns, a_ns, a_ns, a_ns});
 
   std::shared_ptr<Table> result;
   ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
@@ -1446,18 +1426,12 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
   // Input table, all data as is
   auto s1 = ::arrow::schema(
       {field("f_s", t_s), field("f_ms", t_ms), field("f_us", t_us), field("f_ns", t_ns)});
-  auto input = Table::Make(
-      s1,
-      {std::make_shared<Column>("f_s", a_s), std::make_shared<Column>("f_ms", a_ms),
-       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_ns)});
+  auto input = Table::Make(s1, {a_s, a_ms, a_us, a_ns});
 
   // Result when coercing to milliseconds
   auto s2 = ::arrow::schema({field("f_s", t_ms), field("f_ms", t_ms), field("f_us", t_ms),
                              field("f_ns", t_ms)});
-  auto ex_milli_result = Table::Make(
-      s2,
-      {std::make_shared<Column>("f_s", a_ms), std::make_shared<Column>("f_ms", a_ms),
-       std::make_shared<Column>("f_us", a_ms), std::make_shared<Column>("f_ns", a_ms)});
+  auto ex_milli_result = Table::Make(s2, {a_ms, a_ms, a_ms, a_ms});
   std::shared_ptr<Table> milli_result;
   ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
       input, false /* use_threads */, input->num_rows(), {}, &milli_result,
@@ -1469,10 +1443,7 @@ TEST(TestArrowReadWrite, CoerceTimestamps) {
   // Result when coercing to microseconds
   auto s3 = ::arrow::schema({field("f_s", t_us), field("f_ms", t_us), field("f_us", t_us),
                              field("f_ns", t_us)});
-  auto ex_micro_result = Table::Make(
-      s3,
-      {std::make_shared<Column>("f_s", a_us), std::make_shared<Column>("f_ms", a_us),
-       std::make_shared<Column>("f_us", a_us), std::make_shared<Column>("f_ns", a_us)});
+  auto ex_micro_result = Table::Make(s3, {a_us, a_us, a_us, a_us});
   std::shared_ptr<Table> micro_result;
   ASSERT_NO_FATAL_FAILURE(DoSimpleRoundtrip(
       input, false /* use_threads */, input->num_rows(), {}, &micro_result,
@@ -1514,15 +1485,10 @@ TEST(TestArrowReadWrite, CoerceTimestampsLosePrecision) {
   auto s3 = ::arrow::schema({field("f_us", t_us)});
   auto s4 = ::arrow::schema({field("f_ns", t_ns)});
 
-  auto c1 = std::make_shared<Column>("f_s", a_s);
-  auto c2 = std::make_shared<Column>("f_ms", a_ms);
-  auto c3 = std::make_shared<Column>("f_us", a_us);
-  auto c4 = std::make_shared<Column>("f_ns", a_ns);
-
-  auto t1 = Table::Make(s1, {c1});
-  auto t2 = Table::Make(s2, {c2});
-  auto t3 = Table::Make(s3, {c3});
-  auto t4 = Table::Make(s4, {c4});
+  auto t1 = Table::Make(s1, {a_s});
+  auto t2 = Table::Make(s2, {a_ms});
+  auto t3 = Table::Make(s3, {a_us});
+  auto t4 = Table::Make(s4, {a_ns});
 
   auto sink = CreateOutputStream();
 
@@ -1594,12 +1560,9 @@ TEST(TestArrowReadWrite, ImplicitSecondToMillisecondTimestampCoercion) {
   auto si = schema({field("timestamp", t_s)});
   auto sx = schema({field("timestamp", t_ms)});
 
-  auto ci = std::make_shared<Column>("timestamp", a_s);
-  auto cx = std::make_shared<Column>("timestamp", a_ms);
-
-  auto ti = Table::Make(si, {ci});  // input
-  auto tx = Table::Make(sx, {cx});  // expected output
-  std::shared_ptr<Table> to;        // actual output
+  auto ti = Table::Make(si, {a_s});   // input
+  auto tx = Table::Make(sx, {a_ms});  // expected output
+  std::shared_ptr<Table> to;          // actual output
 
   // default properties (without explicit coercion instructions) used ...
   ASSERT_NO_FATAL_FAILURE(
@@ -1635,14 +1598,9 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
   ArrayFromVector<::arrow::TimestampType, int64_t>(t_us, d_us, &a_us);
   ArrayFromVector<::arrow::TimestampType, int64_t>(t_ns, d_ns, &a_ns);
 
-  auto c_s = std::make_shared<Column>("ts:s", a_s);
-  auto c_ms = std::make_shared<Column>("ts:ms", a_ms);
-  auto c_us = std::make_shared<Column>("ts:us", a_us);
-  auto c_ns = std::make_shared<Column>("ts:ns", a_ns);
-
   auto input_schema = schema({field("ts:s", t_s), field("ts:ms", t_ms),
                               field("ts:us", t_us), field("ts:ns", t_ns)});
-  auto input_table = Table::Make(input_schema, {c_s, c_ms, c_us, c_ns});
+  auto input_table = Table::Make(input_schema, {a_s, a_ms, a_us, a_ns});
 
   auto parquet_version_1_properties = ::parquet::default_writer_properties();
   auto parquet_version_2_properties = ::parquet::WriterProperties::Builder()
@@ -1654,7 +1612,7 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // and nanoseconds should be coerced to microseconds
     auto expected_schema = schema({field("ts:s", t_ms), field("ts:ms", t_ms),
                                    field("ts:us", t_us), field("ts:ns", t_us)});
-    auto expected_table = Table::Make(expected_schema, {c_ms, c_ms, c_us, c_us});
+    auto expected_table = Table::Make(expected_schema, {a_ms, a_ms, a_us, a_us});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_1_properties));
   }
@@ -1663,7 +1621,7 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // and nanoseconds should be retained
     auto expected_schema = schema({field("ts:s", t_ms), field("ts:ms", t_ms),
                                    field("ts:us", t_us), field("ts:ns", t_ns)});
-    auto expected_table = Table::Make(expected_schema, {c_ms, c_ms, c_us, c_ns});
+    auto expected_table = Table::Make(expected_schema, {a_ms, a_ms, a_us, a_ns});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_2_properties));
   }
@@ -1693,14 +1651,14 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // Using Parquet version 1.0, coercing to milliseconds or microseconds is allowed
     auto expected_schema = schema({field("ts:s", t_ms), field("ts:ms", t_ms),
                                    field("ts:us", t_ms), field("ts:ns", t_ms)});
-    auto expected_table = Table::Make(expected_schema, {c_ms, c_ms, c_ms, c_ms});
+    auto expected_table = Table::Make(expected_schema, {a_ms, a_ms, a_ms, a_ms});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_1_properties,
                                                      arrow_coerce_to_millis_properties));
 
     expected_schema = schema({field("ts:s", t_us), field("ts:ms", t_us),
                               field("ts:us", t_us), field("ts:ns", t_us)});
-    expected_table = Table::Make(expected_schema, {c_us, c_us, c_us, c_us});
+    expected_table = Table::Make(expected_schema, {a_us, a_us, a_us, a_us});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_1_properties,
                                                      arrow_coerce_to_micros_properties));
@@ -1709,14 +1667,14 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // Using Parquet version 2.0, coercing to milliseconds or microseconds is allowed
     auto expected_schema = schema({field("ts:s", t_ms), field("ts:ms", t_ms),
                                    field("ts:us", t_ms), field("ts:ns", t_ms)});
-    auto expected_table = Table::Make(expected_schema, {c_ms, c_ms, c_ms, c_ms});
+    auto expected_table = Table::Make(expected_schema, {a_ms, a_ms, a_ms, a_ms});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_2_properties,
                                                      arrow_coerce_to_millis_properties));
 
     expected_schema = schema({field("ts:s", t_us), field("ts:ms", t_us),
                               field("ts:us", t_us), field("ts:ns", t_us)});
-    expected_table = Table::Make(expected_schema, {c_us, c_us, c_us, c_us});
+    expected_table = Table::Make(expected_schema, {a_us, a_us, a_us, a_us});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_2_properties,
                                                      arrow_coerce_to_micros_properties));
@@ -1734,7 +1692,7 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // Using Parquet version 2.0, coercing to (int64) nanoseconds is allowed
     auto expected_schema = schema({field("ts:s", t_ns), field("ts:ms", t_ns),
                                    field("ts:us", t_ns), field("ts:ns", t_ns)});
-    auto expected_table = Table::Make(expected_schema, {c_ns, c_ns, c_ns, c_ns});
+    auto expected_table = Table::Make(expected_schema, {a_ns, a_ns, a_ns, a_ns});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_2_properties,
                                                      arrow_coerce_to_nanos_properties));
@@ -1747,7 +1705,7 @@ TEST(TestArrowReadWrite, ParquetVersionTimestampDifferences) {
     // storage is used
     auto expected_schema = schema({field("ts:s", t_ns), field("ts:ms", t_ns),
                                    field("ts:us", t_ns), field("ts:ns", t_ns)});
-    auto expected_table = Table::Make(expected_schema, {c_ns, c_ns, c_ns, c_ns});
+    auto expected_table = Table::Make(expected_schema, {a_ns, a_ns, a_ns, a_ns});
     ASSERT_NO_FATAL_FAILURE(CheckConfiguredRoundtrip(input_table, expected_table,
                                                      parquet_version_1_properties,
                                                      arrow_enable_int96_properties));
@@ -1781,11 +1739,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
   ArrayFromVector<::arrow::Time32Type, int32_t>(f1->type(), is_valid, a1_values, &a1);
   ArrayFromVector<::arrow::Time32Type, int32_t>(f1->type(), a1_values, &a1_nonnull);
 
-  std::vector<std::shared_ptr<::arrow::Column>> columns = {
-      std::make_shared<Column>("f0", a0), std::make_shared<Column>("f1", a1),
-      std::make_shared<Column>("f2", a0_nonnull),
-      std::make_shared<Column>("f3", a1_nonnull)};
-  auto table = Table::Make(schema, columns);
+  auto table = Table::Make(schema, {a0, a1, a0_nonnull, a1_nonnull});
 
   // Expected schema and values
   auto e0 = field("f0", ::arrow::date32());
@@ -1802,11 +1756,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
   ArrayFromVector<::arrow::Time32Type, int32_t>(e1->type(), is_valid, x1_values, &x1);
   ArrayFromVector<::arrow::Time32Type, int32_t>(e1->type(), x1_values, &x1_nonnull);
 
-  std::vector<std::shared_ptr<::arrow::Column>> ex_columns = {
-      std::make_shared<Column>("f0", x0), std::make_shared<Column>("f1", x1),
-      std::make_shared<Column>("f2", x0_nonnull),
-      std::make_shared<Column>("f3", x1_nonnull)};
-  auto ex_table = Table::Make(ex_schema, ex_columns);
+  auto ex_table = Table::Make(ex_schema, {x0, x1, x0_nonnull, x1_nonnull});
 
   std::shared_ptr<Table> result;
   ASSERT_NO_FATAL_FAILURE(
@@ -1819,8 +1769,7 @@ TEST(TestArrowReadWrite, ConvertedDateTimeTypes) {
 
 void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
                      std::shared_ptr<Table>* out) {
-  std::shared_ptr<::arrow::Column> column;
-  std::vector<std::shared_ptr<::arrow::Column>> columns(num_columns);
+  std::vector<std::shared_ptr<::arrow::ChunkedArray>> columns(num_columns);
   std::vector<std::shared_ptr<::arrow::Field>> fields(num_columns);
 
   for (int i = 0; i < num_columns; ++i) {
@@ -1834,10 +1783,8 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
     for (int j = 0; j < nchunks; ++j) {
       arrays.push_back(values);
     }
-    column = MakeColumn(ss.str(), arrays, true);
-
-    columns[i] = column;
-    fields[i] = column->field();
+    columns[i] = std::make_shared<ChunkedArray>(arrays);
+    fields[i] = ::arrow::field(ss.str(), values->type());
   }
   auto schema = std::make_shared<::arrow::Schema>(fields);
   *out = Table::Make(schema, columns);
@@ -2000,11 +1947,11 @@ TEST(TestArrowReadWrite, ReadColumnSubset) {
   ASSERT_NO_FATAL_FAILURE(
       DoSimpleRoundtrip(table, use_threads, table->num_rows(), column_subset, &result));
 
-  std::vector<std::shared_ptr<::arrow::Column>> ex_columns;
+  std::vector<std::shared_ptr<::arrow::ChunkedArray>> ex_columns;
   std::vector<std::shared_ptr<::arrow::Field>> ex_fields;
   for (int i : column_subset) {
     ex_columns.push_back(table->column(i));
-    ex_fields.push_back(table->column(i)->field());
+    ex_fields.push_back(table->field(i));
   }
 
   auto ex_schema = ::arrow::schema(ex_fields);
@@ -2057,11 +2004,7 @@ TEST(TestArrowReadWrite, ListLargeRecords) {
     pieces.push_back(chunked_piece->chunk(0));
   }
   auto chunked = std::make_shared<::arrow::ChunkedArray>(pieces);
-
-  auto chunked_col =
-      std::make_shared<::arrow::Column>(table->schema()->field(0), chunked);
-  std::vector<std::shared_ptr<::arrow::Column>> columns = {chunked_col};
-  auto chunked_table = Table::Make(table->schema(), columns);
+  auto chunked_table = Table::Make(table->schema(), {chunked});
 
   ASSERT_TRUE(table->Equals(*chunked_table));
 }
@@ -2146,8 +2089,7 @@ TEST(TestArrowReadWrite, TableWithChunkedColumns) {
 
     auto field = ::arrow::field("fname", type);
     auto schema = ::arrow::schema({field});
-    auto col = std::make_shared<::arrow::Column>(field, arrays);
-    auto table = Table::Make(schema, {col});
+    auto table = Table::Make(schema, {std::make_shared<ChunkedArray>(arrays)});
 
     ASSERT_NO_FATAL_FAILURE(CheckSimpleRoundtrip(table, 2));
     ASSERT_NO_FATAL_FAILURE(CheckSimpleRoundtrip(table, 3));
@@ -2171,8 +2113,7 @@ TEST(TestArrowReadWrite, TableWithDuplicateColumns) {
   ArrayFromVector<::arrow::Int8Type, int8_t>(a0_values, &a0);
   ArrayFromVector<::arrow::Int16Type, int16_t>(a1_values, &a1);
 
-  auto table = Table::Make(schema, {std::make_shared<Column>(f0->name(), a0),
-                                    std::make_shared<Column>(f1->name(), a1)});
+  auto table = Table::Make(schema, {a0, a1});
   ASSERT_NO_FATAL_FAILURE(CheckSimpleRoundtrip(table, table->num_rows()));
 }
 
@@ -2207,9 +2148,8 @@ TEST(TestArrowReadWrite, DictionaryColumnChunkedWrite) {
       std::make_shared<::arrow::DictionaryArray>(dict_type, f0_values, dict_values),
       std::make_shared<::arrow::DictionaryArray>(dict_type, f1_values, dict_values)};
 
-  std::vector<std::shared_ptr<::arrow::Column>> columns;
-  auto column = MakeColumn("dictionary", dict_arrays, true);
-  columns.emplace_back(column);
+  std::vector<std::shared_ptr<ChunkedArray>> columns;
+  columns.emplace_back(std::make_shared<ChunkedArray>(dict_arrays));
 
   auto table = Table::Make(schema, columns);
 
@@ -2230,7 +2170,7 @@ TEST(TestArrowReadWrite, DictionaryColumnChunkedWrite) {
 
   // The column name gets changed on output to the name of the
   // field, and it also turns into a nullable column
-  columns.emplace_back(MakeColumn("dictionary", expected_array, true));
+  columns.emplace_back(std::make_shared<ChunkedArray>(expected_array));
 
   schema = ::arrow::schema({::arrow::field("dictionary", ::arrow::utf8())});
 
@@ -2320,11 +2260,9 @@ class TestNestedSchemaRead : public ::testing::TestWithParam<Repetition::type> {
   void ValidateTableArrayTypes(const Table& table) {
     for (int i = 0; i < table.num_columns(); i++) {
       const std::shared_ptr<::arrow::Field> schema_field = table.schema()->field(i);
-      const std::shared_ptr<Column> column = table.column(i);
-      // Compare with the column field
-      ASSERT_TRUE(schema_field->Equals(column->field()));
+      const std::shared_ptr<ChunkedArray> column = table.column(i);
       // Compare with the array type
-      ASSERT_TRUE(schema_field->type()->Equals(column->data()->chunk(0)->type()));
+      ASSERT_TRUE(schema_field->type()->Equals(column->chunk(0)->type()));
     }
   }
 
@@ -2519,13 +2457,13 @@ TEST_F(TestNestedSchemaRead, ReadIntoTableFull) {
   ASSERT_NO_FATAL_FAILURE(ValidateTableArrayTypes(*table));
 
   auto struct_field_array =
-      std::static_pointer_cast<::arrow::StructArray>(table->column(0)->data()->chunk(0));
+      std::static_pointer_cast<::arrow::StructArray>(table->column(0)->chunk(0));
   auto leaf1_array =
       std::static_pointer_cast<::arrow::Int32Array>(struct_field_array->field(0));
   auto leaf2_array =
       std::static_pointer_cast<::arrow::Int32Array>(struct_field_array->field(1));
   auto leaf3_array =
-      std::static_pointer_cast<::arrow::Int32Array>(table->column(1)->data()->chunk(0));
+      std::static_pointer_cast<::arrow::Int32Array>(table->column(1)->chunk(0));
 
   // validate struct and leaf arrays
 
@@ -2599,8 +2537,8 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
   const int num_trees = 3;
   const int depth = 3;
 #else
-  const int num_trees = 5;
-  const int depth = 5;
+  const int num_trees = 2;
+  const int depth = 2;
 #endif
   const int num_children = 3;
   int num_rows = SMALL_SIZE * (depth + 2);
@@ -2613,7 +2551,7 @@ TEST_P(TestNestedSchemaRead, DeepNestedSchemaRead) {
 
   DeepParquetTestVisitor visitor(GetParam(), values_array_);
   for (int i = 0; i < table->num_columns(); i++) {
-    auto tree = table->column(i)->data()->chunk(0);
+    auto tree = table->column(i)->chunk(0);
     ASSERT_OK_NO_THROW(visitor.Validate(tree));
   }
 }
@@ -2670,7 +2608,8 @@ TEST(TestArrowReaderAdHoc, DISABLED_LargeStringColumn) {
   }
   std::shared_ptr<Array> array;
   ASSERT_OK(builder.Finish(&array));
-  auto table = Table::Make({std::make_shared<Column>("x", array)});
+  auto table =
+      Table::Make(::arrow::schema({::arrow::field("x", array->type())}), {array});
   std::shared_ptr<SchemaDescriptor> schm;
   ASSERT_OK_NO_THROW(
       ToParquetSchema(table->schema().get(), *default_writer_properties(), &schm));
@@ -2740,10 +2679,9 @@ TEST_P(TestArrowReaderAdHocSparkAndHvr, ReadDecimals) {
   auto value_column = table->column(0);
   ASSERT_EQ(expected_length, value_column->length());
 
-  auto raw_array = value_column->data();
-  ASSERT_EQ(1, raw_array->num_chunks());
+  ASSERT_EQ(1, value_column->num_chunks());
 
-  auto chunk = raw_array->chunk(0);
+  auto chunk = value_column->chunk(0);
 
   std::shared_ptr<Array> expected_array;
 
diff --git a/cpp/src/parquet/arrow/arrow-schema-test.cc b/cpp/src/parquet/arrow/arrow-schema-test.cc
index 9a43c7de2b7..dc0a02a7b87 100644
--- a/cpp/src/parquet/arrow/arrow-schema-test.cc
+++ b/cpp/src/parquet/arrow/arrow-schema-test.cc
@@ -116,14 +116,14 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
   parquet_fields.push_back(PrimitiveNode::Make("timestamp", Repetition::REQUIRED,
                                                ParquetType::INT64,
                                                ConvertedType::TIMESTAMP_MILLIS));
-  arrow_fields.push_back(std::make_shared<Field>(
-      "timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));
+  arrow_fields.push_back(
+      std::make_shared<Field>("timestamp", ::arrow::timestamp(TimeUnit::MILLI), false));
 
   parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
                                                ParquetType::INT64,
                                                ConvertedType::TIMESTAMP_MICROS));
   arrow_fields.push_back(std::make_shared<Field>(
-      "timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));
+      "timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO), false));
 
   parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
                                                ParquetType::INT32, ConvertedType::DATE));
@@ -855,14 +855,21 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
       {"time64(nanosecond)", ::arrow::time64(::arrow::TimeUnit::NANO),
        LogicalType::Time(true, LogicalType::TimeUnit::NANOS), ParquetType::INT64, -1},
       {"timestamp(millisecond)", ::arrow::timestamp(::arrow::TimeUnit::MILLI),
-       LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
-       -1},
+       LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+                              /*is_from_converted_type=*/false,
+                              /*force_set_converted_type=*/true),
+       ParquetType::INT64, -1},
       {"timestamp(microsecond)", ::arrow::timestamp(::arrow::TimeUnit::MICRO),
-       LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
-       -1},
+       LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS,
+                              /*is_from_converted_type=*/false,
+                              /*force_set_converted_type=*/true),
+       ParquetType::INT64, -1},
+      // Parquet v1, values converted to microseconds
       {"timestamp(nanosecond)", ::arrow::timestamp(::arrow::TimeUnit::NANO),
-       LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), ParquetType::INT64,
-       -1},
+       LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS,
+                              /*is_from_converted_type=*/false,
+                              /*force_set_converted_type=*/true),
+       ParquetType::INT64, -1},
       {"timestamp(millisecond, UTC)", ::arrow::timestamp(::arrow::TimeUnit::MILLI, "UTC"),
        LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS), ParquetType::INT64,
        -1},
@@ -887,14 +894,15 @@ TEST_F(TestConvertArrowSchema, ArrowFields) {
   std::vector<NodePtr> parquet_fields;
 
   for (const FieldConstructionArguments& c : cases) {
-    arrow_fields.push_back(std::make_shared<Field>(c.name, c.datatype, false));
+    arrow_fields.push_back(::arrow::field(c.name, c.datatype, false));
     parquet_fields.push_back(PrimitiveNode::Make(c.name, Repetition::REQUIRED,
                                                  c.logical_type, c.physical_type,
                                                  c.physical_length));
   }
 
   ASSERT_OK(ConvertSchema(arrow_fields));
-  ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields));
+  CheckFlatSchema(parquet_fields);
+  // ASSERT_NO_FATAL_FAILURE();
 }
 
 TEST_F(TestConvertArrowSchema, ArrowNonconvertibleFields) {
diff --git a/cpp/src/parquet/arrow/reader-writer-benchmark.cc b/cpp/src/parquet/arrow/reader-writer-benchmark.cc
index d035e1ce8d6..239d707e231 100644
--- a/cpp/src/parquet/arrow/reader-writer-benchmark.cc
+++ b/cpp/src/parquet/arrow/reader-writer-benchmark.cc
@@ -113,8 +113,7 @@ std::shared_ptr<::arrow::Table> TableFromVector(
 
   auto field = ::arrow::field("column", type, nullable);
   auto schema = ::arrow::schema({field});
-  auto column = std::make_shared<::arrow::Column>(field, array);
-  return ::arrow::Table::Make(schema, {column});
+  return ::arrow::Table::Make(schema, {array});
 }
 
 template <>
@@ -136,8 +135,7 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
   auto field = ::arrow::field("column", ::arrow::boolean(), nullable);
   auto schema = std::make_shared<::arrow::Schema>(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));
-  auto column = std::make_shared<::arrow::Column>(field, array);
-  return ::arrow::Table::Make(schema, {column});
+  return ::arrow::Table::Make(schema, {array});
 }
 
 template <bool nullable, typename ParquetType>
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 3fe37b0e239..45071b5003f 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -42,7 +42,6 @@
 // For arrow::compute::Datum. This should perhaps be promoted. See ARROW-4022
 #include "arrow/compute/kernel.h"
 
-#include "parquet/arrow/record_reader.h"
 #include "parquet/arrow/schema.h"
 #include "parquet/column_reader.h"
 #include "parquet/exception.h"
@@ -56,7 +55,6 @@
 using arrow::Array;
 using arrow::BooleanArray;
 using arrow::ChunkedArray;
-using arrow::Column;
 using arrow::Field;
 using arrow::Int32Array;
 using arrow::ListArray;
@@ -83,6 +81,7 @@ namespace arrow {
 
 using ::arrow::BitUtil::FromBigEndian;
 using ::arrow::internal::SafeLeftShift;
+using ::arrow::util::SafeLoadAs;
 
 template <typename ArrowType>
 using ArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
@@ -190,12 +189,10 @@ class RowGroupRecordBatchReader : public ::arrow::RecordBatchReader {
 
     // TODO (hatemhelal): Consider refactoring this to share logic with ReadTable as this
     // does not currently honor the use_threads option.
-    std::vector<std::shared_ptr<Column>> columns(column_indices_.size());
+    std::vector<std::shared_ptr<ChunkedArray>> columns(column_indices_.size());
 
     for (size_t i = 0; i < column_indices_.size(); ++i) {
-      std::shared_ptr<ChunkedArray> array;
-      RETURN_NOT_OK(column_readers_[i]->NextBatch(batch_size_, &array));
-      columns[i] = std::make_shared<Column>(schema_->field(static_cast<int>(i)), array);
+      RETURN_NOT_OK(column_readers_[i]->NextBatch(batch_size_, &columns[i]));
     }
 
     // Create an intermediate table and use TableBatchReader as an adaptor to a
@@ -277,7 +274,7 @@ class FileReader::Impl {
   std::vector<int> GetDictionaryIndices(const std::vector<int>& indices);
   std::shared_ptr<::arrow::Schema> FixSchema(
       const ::arrow::Schema& old_schema, const std::vector<int>& dict_indices,
-      std::vector<std::shared_ptr<::arrow::Column>>& columns);
+      const std::vector<std::shared_ptr<::arrow::ChunkedArray>>& columns);
 
   int64_t batch_size() const { return reader_properties_.batch_size(); }
 
@@ -547,15 +544,14 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
     return Status::Invalid("Invalid column index");
   }
   int num_fields = static_cast<int>(field_indices.size());
-  std::vector<std::shared_ptr<Column>> columns(num_fields);
+  std::vector<std::shared_ptr<ChunkedArray>> columns(num_fields);
 
   // TODO(wesm): Refactor to share more code with ReadTable
 
-  auto ReadColumnFunc = [&indices, &field_indices, &row_group_index, &schema, &columns,
+  auto ReadColumnFunc = [&indices, &field_indices, &row_group_index, &columns,
                          this](int i) {
-    std::shared_ptr<ChunkedArray> array;
-    RETURN_NOT_OK(ReadColumnChunk(field_indices[i], indices, row_group_index, &array));
-    columns[i] = std::make_shared<Column>(schema->field(i), array);
+    RETURN_NOT_OK(
+        ReadColumnChunk(field_indices[i], indices, row_group_index, &columns[i]));
     return Status::OK();
   };
 
@@ -605,13 +601,10 @@ Status FileReader::Impl::ReadTable(const std::vector<int>& indices,
   }
 
   int num_fields = static_cast<int>(field_indices.size());
-  std::vector<std::shared_ptr<Column>> columns(num_fields);
+  std::vector<std::shared_ptr<ChunkedArray>> columns(num_fields);
 
-  auto ReadColumnFunc = [&indices, &field_indices, &schema, &columns, this](int i) {
-    std::shared_ptr<ChunkedArray> array;
-    RETURN_NOT_OK(ReadSchemaField(field_indices[i], indices, &array));
-    columns[i] = std::make_shared<Column>(schema->field(i), array);
-    return Status::OK();
+  auto ReadColumnFunc = [&indices, &field_indices, &columns, this](int i) {
+    return ReadSchemaField(field_indices[i], indices, &columns[i]);
   };
 
   if (reader_properties_.use_threads()) {
@@ -696,18 +689,13 @@ std::vector<int> FileReader::Impl::GetDictionaryIndices(const std::vector<int>&
 
 std::shared_ptr<::arrow::Schema> FileReader::Impl::FixSchema(
     const ::arrow::Schema& old_schema, const std::vector<int>& dict_indices,
-    std::vector<std::shared_ptr<::arrow::Column>>& columns) {
+    const std::vector<std::shared_ptr<::arrow::ChunkedArray>>& columns) {
   // Fix the schema with the actual DictionaryType that was read
   auto fields = old_schema.fields();
 
   for (int idx : dict_indices) {
-    auto name = columns[idx]->name();
-    auto dict_array = columns[idx]->data();
-    auto dict_field = std::make_shared<::arrow::Field>(name, dict_array->type());
-    fields[idx] = dict_field;
-    columns[idx] = std::make_shared<Column>(dict_field, dict_array);
+    fields[idx] = old_schema.field(idx)->WithType(columns[idx]->type());
   }
-
   return std::make_shared<::arrow::Schema>(fields, old_schema.metadata());
 }
 
@@ -1212,38 +1200,37 @@ static uint64_t BytesToInteger(const uint8_t* bytes, int32_t start, int32_t stop
     case 1:
       return bytes[start];
     case 2:
-      return FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint16_t>(bytes + start));
     case 3: {
-      const uint64_t first_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start));
+      const uint64_t first_two_bytes = FromBigEndian(SafeLoadAs<uint16_t>(bytes + start));
       const uint64_t last_byte = bytes[stop - 1];
       return first_two_bytes << 8 | last_byte;
     }
     case 4:
-      return FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
     case 5: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t last_byte = bytes[stop - 1];
       return first_four_bytes << 8 | last_byte;
     }
     case 6: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t last_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start + 4));
+          FromBigEndian(SafeLoadAs<uint16_t>(bytes + start + 4));
       return first_four_bytes << 16 | last_two_bytes;
     }
     case 7: {
       const uint64_t first_four_bytes =
-          FromBigEndian(*reinterpret_cast<const uint32_t*>(bytes + start));
+          FromBigEndian(SafeLoadAs<uint32_t>(bytes + start));
       const uint64_t second_two_bytes =
-          FromBigEndian(*reinterpret_cast<const uint16_t*>(bytes + start + 4));
+          FromBigEndian(SafeLoadAs<uint16_t>(bytes + start + 4));
       const uint64_t last_byte = bytes[stop - 1];
       return first_four_bytes << 24 | second_two_bytes << 8 | last_byte;
     }
     case 8:
-      return FromBigEndian(*reinterpret_cast<const uint64_t*>(bytes + start));
+      return FromBigEndian(SafeLoadAs<uint64_t>(bytes + start));
     default: {
       DCHECK(false);
       return UINT64_MAX;
@@ -1740,8 +1727,9 @@ void StructImpl::InitField(
   for (size_t i = 0; i < children.size(); i++) {
     fields[i] = children[i]->field();
   }
+
   auto type = ::arrow::struct_(fields);
-  field_ = ::arrow::field(node->name(), type);
+  field_ = ::arrow::field(node->name(), type, node->is_optional());
 }
 
 Status StructImpl::GetRepLevels(const int16_t** data, size_t* length) {
diff --git a/cpp/src/parquet/arrow/record_reader.cc b/cpp/src/parquet/arrow/record_reader.cc
deleted file mode 100644
index 71bedec35ab..00000000000
--- a/cpp/src/parquet/arrow/record_reader.cc
+++ /dev/null
@@ -1,975 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "parquet/arrow/record_reader.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <cstring>
-#include <iostream>
-#include <memory>
-#include <unordered_map>
-#include <utility>
-
-#include "arrow/array.h"
-#include "arrow/buffer.h"
-#include "arrow/builder.h"
-#include "arrow/type.h"
-#include "arrow/util/logging.h"
-
-#include "parquet/column_page.h"
-#include "parquet/column_reader.h"
-#include "parquet/encoding.h"
-#include "parquet/exception.h"
-#include "parquet/schema.h"
-#include "parquet/types.h"
-
-using arrow::MemoryPool;
-
-namespace parquet {
-namespace internal {
-
-// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
-// encoding.
-static bool IsDictionaryIndexEncoding(Encoding::type e) {
-  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
-}
-
-// The minimum number of repetition/definition levels to decode at a time, for
-// better vectorized performance when doing many smaller record reads
-constexpr int64_t kMinLevelBatchSize = 1024;
-
-class RecordReader::RecordReaderImpl {
- public:
-  RecordReaderImpl(const ColumnDescriptor* descr, MemoryPool* pool)
-      : descr_(descr),
-        pool_(pool),
-        num_buffered_values_(0),
-        num_decoded_values_(0),
-        max_def_level_(descr->max_definition_level()),
-        max_rep_level_(descr->max_repetition_level()),
-        at_record_start_(true),
-        records_read_(0),
-        values_written_(0),
-        values_capacity_(0),
-        null_count_(0),
-        levels_written_(0),
-        levels_position_(0),
-        levels_capacity_(0),
-        uses_values_(!(descr->physical_type() == Type::BYTE_ARRAY)) {
-    nullable_values_ = internal::HasSpacedValues(descr);
-    if (uses_values_) {
-      values_ = AllocateBuffer(pool);
-    }
-    valid_bits_ = AllocateBuffer(pool);
-    def_levels_ = AllocateBuffer(pool);
-    rep_levels_ = AllocateBuffer(pool);
-    Reset();
-  }
-
-  virtual ~RecordReaderImpl() = default;
-
-  virtual int64_t ReadRecordData(int64_t num_records) = 0;
-
-  // Returns true if there are still values in this column.
-  bool HasNext() {
-    // Either there is no data page available yet, or the data page has been
-    // exhausted
-    if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
-      if (!ReadNewPage() || num_buffered_values_ == 0) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  int64_t ReadRecords(int64_t num_records) {
-    // Delimit records, then read values at the end
-    int64_t records_read = 0;
-
-    if (levels_position_ < levels_written_) {
-      records_read += ReadRecordData(num_records);
-    }
-
-    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records);
-
-    // If we are in the middle of a record, we continue until reaching the
-    // desired number of records or the end of the current record if we've found
-    // enough records
-    while (!at_record_start_ || records_read < num_records) {
-      // Is there more data to read in this row group?
-      if (!HasNext()) {
-        if (!at_record_start_) {
-          // We ended the row group while inside a record that we haven't seen
-          // the end of yet. So increment the record count for the last record in
-          // the row group
-          ++records_read;
-          at_record_start_ = true;
-        }
-        break;
-      }
-
-      /// We perform multiple batch reads until we either exhaust the row group
-      /// or observe the desired number of records
-      int64_t batch_size = std::min(level_batch_size, available_values_current_page());
-
-      // No more data in column
-      if (batch_size == 0) {
-        break;
-      }
-
-      if (max_def_level_ > 0) {
-        ReserveLevels(batch_size);
-
-        int16_t* def_levels = this->def_levels() + levels_written_;
-        int16_t* rep_levels = this->rep_levels() + levels_written_;
-
-        // Not present for non-repeated fields
-        int64_t levels_read = 0;
-        if (max_rep_level_ > 0) {
-          levels_read = ReadDefinitionLevels(batch_size, def_levels);
-          if (ReadRepetitionLevels(batch_size, rep_levels) != levels_read) {
-            throw ParquetException("Number of decoded rep / def levels did not match");
-          }
-        } else if (max_def_level_ > 0) {
-          levels_read = ReadDefinitionLevels(batch_size, def_levels);
-        }
-
-        // Exhausted column chunk
-        if (levels_read == 0) {
-          break;
-        }
-
-        levels_written_ += levels_read;
-        records_read += ReadRecordData(num_records - records_read);
-      } else {
-        // No repetition or definition levels
-        batch_size = std::min(num_records - records_read, batch_size);
-        records_read += ReadRecordData(batch_size);
-      }
-    }
-
-    return records_read;
-  }
-
-  // Dictionary decoders must be reset when advancing row groups
-  virtual void ResetDecoders() = 0;
-
-  void SetPageReader(std::unique_ptr<PageReader> reader) {
-    at_record_start_ = true;
-    pager_ = std::move(reader);
-    ResetDecoders();
-  }
-
-  bool HasMoreData() const { return pager_ != nullptr; }
-
-  int16_t* def_levels() const {
-    return reinterpret_cast<int16_t*>(def_levels_->mutable_data());
-  }
-
-  int16_t* rep_levels() {
-    return reinterpret_cast<int16_t*>(rep_levels_->mutable_data());
-  }
-
-  uint8_t* values() const { return values_->mutable_data(); }
-
-  /// \brief Number of values written including nulls (if any)
-  int64_t values_written() const { return values_written_; }
-
-  int64_t levels_position() const { return levels_position_; }
-  int64_t levels_written() const { return levels_written_; }
-
-  // We may outwardly have the appearance of having exhausted a column chunk
-  // when in fact we are in the middle of processing the last batch
-  bool has_values_to_process() const { return levels_position_ < levels_written_; }
-
-  int64_t null_count() const { return null_count_; }
-
-  bool nullable_values() const { return nullable_values_; }
-
-  std::shared_ptr<ResizableBuffer> ReleaseValues() {
-    if (uses_values_) {
-      auto result = values_;
-      values_ = AllocateBuffer(pool_);
-      return result;
-    } else {
-      return nullptr;
-    }
-  }
-
-  std::shared_ptr<ResizableBuffer> ReleaseIsValid() {
-    auto result = valid_bits_;
-    valid_bits_ = AllocateBuffer(pool_);
-    return result;
-  }
-
-  // Process written repetition/definition levels to reach the end of
-  // records. Process no more levels than necessary to delimit the indicated
-  // number of logical records. Updates internal state of RecordReader
-  //
-  // \return Number of records delimited
-  int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) {
-    int64_t values_to_read = 0;
-    int64_t records_read = 0;
-
-    const int16_t* def_levels = this->def_levels() + levels_position_;
-    const int16_t* rep_levels = this->rep_levels() + levels_position_;
-
-    DCHECK_GT(max_rep_level_, 0);
-
-    // Count logical records and number of values to read
-    while (levels_position_ < levels_written_) {
-      if (*rep_levels++ == 0) {
-        // If at_record_start_ is true, we are seeing the start of a record
-        // for the second time, such as after repeated calls to
-        // DelimitRecords. In this case we must continue until we find
-        // another record start or exhausting the ColumnChunk
-        if (!at_record_start_) {
-          // We've reached the end of a record; increment the record count.
-          ++records_read;
-          if (records_read == num_records) {
-            // We've found the number of records we were looking for. Set
-            // at_record_start_ to true and break
-            at_record_start_ = true;
-            break;
-          }
-        }
-      }
-
-      // We have decided to consume the level at this position; therefore we
-      // must advance until we find another record boundary
-      at_record_start_ = false;
-
-      if (*def_levels++ == max_def_level_) {
-        ++values_to_read;
-      }
-      ++levels_position_;
-    }
-    *values_seen = values_to_read;
-    return records_read;
-  }
-
-  // Read multiple definition levels into preallocated memory
-  //
-  // Returns the number of decoded definition levels
-  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
-    if (descr_->max_definition_level() == 0) {
-      return 0;
-    }
-    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
-  }
-
-  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
-    if (descr_->max_repetition_level() == 0) {
-      return 0;
-    }
-    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
-  }
-
-  int64_t available_values_current_page() const {
-    return num_buffered_values_ - num_decoded_values_;
-  }
-
-  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
-
-  Type::type type() const { return descr_->physical_type(); }
-
-  const ColumnDescriptor* descr() const { return descr_; }
-
-  void Reserve(int64_t capacity) {
-    ReserveLevels(capacity);
-    ReserveValues(capacity);
-  }
-
-  void ReserveLevels(int64_t capacity) {
-    if (descr_->max_definition_level() > 0 &&
-        (levels_written_ + capacity > levels_capacity_)) {
-      int64_t new_levels_capacity = BitUtil::NextPower2(levels_capacity_ + 1);
-      while (levels_written_ + capacity > new_levels_capacity) {
-        new_levels_capacity = BitUtil::NextPower2(new_levels_capacity + 1);
-      }
-      PARQUET_THROW_NOT_OK(
-          def_levels_->Resize(new_levels_capacity * sizeof(int16_t), false));
-      if (descr_->max_repetition_level() > 0) {
-        PARQUET_THROW_NOT_OK(
-            rep_levels_->Resize(new_levels_capacity * sizeof(int16_t), false));
-      }
-      levels_capacity_ = new_levels_capacity;
-    }
-  }
-
-  void ReserveValues(int64_t capacity) {
-    if (values_written_ + capacity > values_capacity_) {
-      int64_t new_values_capacity = BitUtil::NextPower2(values_capacity_ + 1);
-      while (values_written_ + capacity > new_values_capacity) {
-        new_values_capacity = BitUtil::NextPower2(new_values_capacity + 1);
-      }
-
-      int type_size = GetTypeByteSize(descr_->physical_type());
-
-      // XXX(wesm): A hack to avoid memory allocation when reading directly
-      // into builder classes
-      if (uses_values_) {
-        PARQUET_THROW_NOT_OK(values_->Resize(new_values_capacity * type_size, false));
-      }
-
-      values_capacity_ = new_values_capacity;
-    }
-    if (nullable_values_) {
-      int64_t valid_bytes_new = BitUtil::BytesForBits(values_capacity_);
-      if (valid_bits_->size() < valid_bytes_new) {
-        int64_t valid_bytes_old = BitUtil::BytesForBits(values_written_);
-        PARQUET_THROW_NOT_OK(valid_bits_->Resize(valid_bytes_new, false));
-
-        // Avoid valgrind warnings
-        memset(valid_bits_->mutable_data() + valid_bytes_old, 0,
-               valid_bytes_new - valid_bytes_old);
-      }
-    }
-  }
-
-  void Reset() {
-    ResetValues();
-
-    if (levels_written_ > 0) {
-      const int64_t levels_remaining = levels_written_ - levels_position_;
-      // Shift remaining levels to beginning of buffer and trim to only the number
-      // of decoded levels remaining
-      int16_t* def_data = def_levels();
-      int16_t* rep_data = rep_levels();
-
-      std::copy(def_data + levels_position_, def_data + levels_written_, def_data);
-      PARQUET_THROW_NOT_OK(
-          def_levels_->Resize(levels_remaining * sizeof(int16_t), false));
-
-      if (max_rep_level_ > 0) {
-        std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data);
-        PARQUET_THROW_NOT_OK(
-            rep_levels_->Resize(levels_remaining * sizeof(int16_t), false));
-      }
-
-      levels_written_ -= levels_position_;
-      levels_position_ = 0;
-      levels_capacity_ = levels_remaining;
-    }
-
-    records_read_ = 0;
-
-    // Call Finish on the binary builders to reset them
-  }
-
-  void ResetValues() {
-    if (values_written_ > 0) {
-      // Resize to 0, but do not shrink to fit
-      if (uses_values_) {
-        PARQUET_THROW_NOT_OK(values_->Resize(0, false));
-      }
-      PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false));
-      values_written_ = 0;
-      values_capacity_ = 0;
-      null_count_ = 0;
-    }
-  }
-
-  virtual void DebugPrintState() = 0;
-
-  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0;
-
- protected:
-  virtual bool ReadNewPage() = 0;
-
-  const ColumnDescriptor* descr_;
-  ::arrow::MemoryPool* pool_;
-
-  std::unique_ptr<PageReader> pager_;
-  std::shared_ptr<Page> current_page_;
-
-  // Not set if full schema for this field has no optional or repeated elements
-  LevelDecoder definition_level_decoder_;
-
-  // Not set for flat schemas.
-  LevelDecoder repetition_level_decoder_;
-
-  // The total number of values stored in the data page. This is the maximum of
-  // the number of encoded definition levels or encoded values. For
-  // non-repeated, required columns, this is equal to the number of encoded
-  // values. For repeated or optional values, there may be fewer data values
-  // than levels, and this tells you how many encoded levels there are in that
-  // case.
-  int64_t num_buffered_values_;
-
-  // The number of values from the current data page that have been decoded
-  // into memory
-  int64_t num_decoded_values_;
-
-  const int16_t max_def_level_;
-  const int16_t max_rep_level_;
-
-  bool nullable_values_;
-
-  bool at_record_start_;
-  int64_t records_read_;
-
-  int64_t values_written_;
-  int64_t values_capacity_;
-  int64_t null_count_;
-
-  int64_t levels_written_;
-  int64_t levels_position_;
-  int64_t levels_capacity_;
-
-  std::shared_ptr<::arrow::ResizableBuffer> values_;
-  // In the case of false, don't allocate the values buffer (when we directly read into
-  // builder classes).
-  bool uses_values_;
-
-  template <typename T>
-  T* ValuesHead() {
-    return reinterpret_cast<T*>(values_->mutable_data()) + values_written_;
-  }
-
-  std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
-  std::shared_ptr<::arrow::ResizableBuffer> def_levels_;
-  std::shared_ptr<::arrow::ResizableBuffer> rep_levels_;
-};
-
-template <typename DType>
-class TypedRecordReader : public RecordReader::RecordReaderImpl {
- public:
-  using T = typename DType::c_type;
-
-  TypedRecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : RecordReader::RecordReaderImpl(descr, pool), current_decoder_(nullptr) {}
-
-  void ResetDecoders() override { decoders_.clear(); }
-
-  virtual void ReadValuesSpaced(int64_t values_with_nulls, int64_t null_count) {
-    uint8_t* valid_bits = valid_bits_->mutable_data();
-    const int64_t valid_bits_offset = values_written_;
-
-    int64_t num_decoded = current_decoder_->DecodeSpaced(
-        ValuesHead<T>(), static_cast<int>(values_with_nulls),
-        static_cast<int>(null_count), valid_bits, valid_bits_offset);
-    DCHECK_EQ(num_decoded, values_with_nulls);
-  }
-
-  virtual void ReadValuesDense(int64_t values_to_read) {
-    int64_t num_decoded =
-        current_decoder_->Decode(ValuesHead<T>(), static_cast<int>(values_to_read));
-    DCHECK_EQ(num_decoded, values_to_read);
-  }
-
-  // Return number of logical records read
-  int64_t ReadRecordData(int64_t num_records) override {
-    // Conservative upper bound
-    const int64_t possible_num_values =
-        std::max(num_records, levels_written_ - levels_position_);
-    ReserveValues(possible_num_values);
-
-    const int64_t start_levels_position = levels_position_;
-
-    int64_t values_to_read = 0;
-    int64_t records_read = 0;
-    if (max_rep_level_ > 0) {
-      records_read = DelimitRecords(num_records, &values_to_read);
-    } else if (max_def_level_ > 0) {
-      // No repetition levels, skip delimiting logic. Each level represents a
-      // null or not null entry
-      records_read = std::min(levels_written_ - levels_position_, num_records);
-
-      // This is advanced by DelimitRecords, which we skipped
-      levels_position_ += records_read;
-    } else {
-      records_read = values_to_read = num_records;
-    }
-
-    int64_t null_count = 0;
-    if (nullable_values_) {
-      int64_t values_with_nulls = 0;
-      internal::DefinitionLevelsToBitmap(
-          def_levels() + start_levels_position, levels_position_ - start_levels_position,
-          max_def_level_, max_rep_level_, &values_with_nulls, &null_count,
-          valid_bits_->mutable_data(), values_written_);
-      values_to_read = values_with_nulls - null_count;
-      ReadValuesSpaced(values_with_nulls, null_count);
-      ConsumeBufferedValues(levels_position_ - start_levels_position);
-    } else {
-      ReadValuesDense(values_to_read);
-      ConsumeBufferedValues(values_to_read);
-    }
-    // Total values, including null spaces, if any
-    values_written_ += values_to_read + null_count;
-    null_count_ += null_count;
-
-    return records_read;
-  }
-
-  void DebugPrintState() override {
-    const int16_t* def_levels = this->def_levels();
-    const int16_t* rep_levels = this->rep_levels();
-    const int64_t total_levels_read = levels_position_;
-
-    const T* values = reinterpret_cast<const T*>(this->values());
-
-    std::cout << "def levels: ";
-    for (int64_t i = 0; i < total_levels_read; ++i) {
-      std::cout << def_levels[i] << " ";
-    }
-    std::cout << std::endl;
-
-    std::cout << "rep levels: ";
-    for (int64_t i = 0; i < total_levels_read; ++i) {
-      std::cout << rep_levels[i] << " ";
-    }
-    std::cout << std::endl;
-
-    std::cout << "values: ";
-    for (int64_t i = 0; i < this->values_written(); ++i) {
-      std::cout << values[i] << " ";
-    }
-    std::cout << std::endl;
-  }
-
-  std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() override {
-    throw ParquetException("GetChunks only implemented for binary types");
-  }
-
- protected:
-  using DecoderType = typename EncodingTraits<DType>::Decoder;
-
-  DecoderType* current_decoder_;
-
- private:
-  // Map of encoding type to the respective decoder object. For example, a
-  // column chunk's data pages may include both dictionary-encoded and
-  // plain-encoded data.
-  std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_;
-
-  // Initialize repetition and definition level decoders on the next data page.
-  int64_t InitializeLevelDecoders(const DataPage& page,
-                                  Encoding::type repetition_level_encoding,
-                                  Encoding::type definition_level_encoding);
-
-  void InitializeDataDecoder(const DataPage& page, int64_t levels_bytes);
-
-  // Advance to the next data page
-  bool ReadNewPage() override;
-
-  void ConfigureDictionary(const DictionaryPage* page);
-};
-
-class FLBARecordReader : public TypedRecordReader<FLBAType> {
- public:
-  FLBARecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : TypedRecordReader<FLBAType>(descr, pool), builder_(nullptr) {
-    DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY);
-    int byte_width = descr_->type_length();
-    std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
-    builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, pool_));
-  }
-
-  ::arrow::ArrayVector GetBuilderChunks() override {
-    std::shared_ptr<::arrow::Array> chunk;
-    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
-    return ::arrow::ArrayVector({chunk});
-  }
-
-  void ReadValuesDense(int64_t values_to_read) override {
-    auto values = ValuesHead<FLBA>();
-    int64_t num_decoded =
-        current_decoder_->Decode(values, static_cast<int>(values_to_read));
-    DCHECK_EQ(num_decoded, values_to_read);
-
-    for (int64_t i = 0; i < num_decoded; i++) {
-      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
-    }
-    ResetValues();
-  }
-
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
-    uint8_t* valid_bits = valid_bits_->mutable_data();
-    const int64_t valid_bits_offset = values_written_;
-    auto values = ValuesHead<FLBA>();
-
-    int64_t num_decoded = current_decoder_->DecodeSpaced(
-        values, static_cast<int>(values_to_read), static_cast<int>(null_count),
-        valid_bits, valid_bits_offset);
-    DCHECK_EQ(num_decoded, values_to_read);
-
-    for (int64_t i = 0; i < num_decoded; i++) {
-      if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) {
-        PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
-      } else {
-        PARQUET_THROW_NOT_OK(builder_->AppendNull());
-      }
-    }
-    ResetValues();
-  }
-
- private:
-  std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_;
-};
-
-class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType> {
- public:
-  ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : TypedRecordReader<ByteArrayType>(descr, pool), builder_(nullptr) {
-    // ARROW-4688(wesm): Using 2^31 - 1 chunks for now
-    constexpr int32_t kBinaryChunksize = 2147483647;
-    DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
-    if (descr_->converted_type() == ConvertedType::UTF8) {
-      builder_.reset(
-          new ::arrow::internal::ChunkedStringBuilder(kBinaryChunksize, pool_));
-    } else {
-      builder_.reset(
-          new ::arrow::internal::ChunkedBinaryBuilder(kBinaryChunksize, pool_));
-    }
-  }
-
-  ::arrow::ArrayVector GetBuilderChunks() override {
-    ::arrow::ArrayVector chunks;
-    PARQUET_THROW_NOT_OK(builder_->Finish(&chunks));
-    return chunks;
-  }
-
-  void ReadValuesDense(int64_t values_to_read) override {
-    int64_t num_decoded = current_decoder_->DecodeArrowNonNull(
-        static_cast<int>(values_to_read), builder_.get());
-    DCHECK_EQ(num_decoded, values_to_read);
-    ResetValues();
-  }
-
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
-    int64_t num_decoded = current_decoder_->DecodeArrow(
-        static_cast<int>(values_to_read), static_cast<int>(null_count),
-        valid_bits_->mutable_data(), values_written_, builder_.get());
-    DCHECK_EQ(num_decoded, values_to_read);
-    ResetValues();
-  }
-
- private:
-  std::unique_ptr<::arrow::internal::ChunkedBinaryBuilder> builder_;
-};
-
-template <typename BuilderType>
-class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType> {
- public:
-  ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr,
-                                  ::arrow::MemoryPool* pool)
-      : TypedRecordReader<ByteArrayType>(descr, pool), builder_(new BuilderType(pool)) {}
-
-  ::arrow::ArrayVector GetBuilderChunks() override {
-    std::shared_ptr<::arrow::Array> chunk;
-    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
-    return ::arrow::ArrayVector({chunk});
-  }
-
-  void ReadValuesDense(int64_t values_to_read) override {
-    int64_t num_decoded = current_decoder_->DecodeArrowNonNull(
-        static_cast<int>(values_to_read), builder_.get());
-    DCHECK_EQ(num_decoded, values_to_read);
-    ResetValues();
-  }
-
-  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
-    int64_t num_decoded = current_decoder_->DecodeArrow(
-        static_cast<int>(values_to_read), static_cast<int>(null_count),
-        valid_bits_->mutable_data(), values_written_, builder_.get());
-    DCHECK_EQ(num_decoded, values_to_read);
-    ResetValues();
-  }
-
- private:
-  std::unique_ptr<BuilderType> builder_;
-};
-
-// TODO(wesm): Implement these to some satisfaction
-template <>
-void TypedRecordReader<Int96Type>::DebugPrintState() {}
-
-template <>
-void TypedRecordReader<ByteArrayType>::DebugPrintState() {}
-
-template <>
-void TypedRecordReader<FLBAType>::DebugPrintState() {}
-
-template <typename DType>
-inline void TypedRecordReader<DType>::ConfigureDictionary(const DictionaryPage* page) {
-  int encoding = static_cast<int>(page->encoding());
-  if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
-      page->encoding() == Encoding::PLAIN) {
-    encoding = static_cast<int>(Encoding::RLE_DICTIONARY);
-  }
-
-  auto it = decoders_.find(encoding);
-  if (it != decoders_.end()) {
-    throw ParquetException("Column cannot have more than one dictionary.");
-  }
-
-  if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
-      page->encoding() == Encoding::PLAIN) {
-    auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
-    dictionary->SetData(page->num_values(), page->data(), page->size());
-
-    // The dictionary is fully decoded during DictionaryDecoder::Init, so the
-    // DictionaryPage buffer is no longer required after this step
-    //
-    // TODO(wesm): investigate whether this all-or-nothing decoding of the
-    // dictionary makes sense and whether performance can be improved
-
-    std::unique_ptr<DictDecoder<DType>> decoder = MakeDictDecoder<DType>(descr_, pool_);
-    decoder->SetDict(dictionary.get());
-    decoders_[encoding] =
-        std::unique_ptr<DecoderType>(dynamic_cast<DecoderType*>(decoder.release()));
-  } else {
-    ParquetException::NYI("only plain dictionary encoding has been implemented");
-  }
-
-  current_decoder_ = decoders_[encoding].get();
-  DCHECK(current_decoder_);
-}
-
-// If the data page includes repetition and definition levels, we
-// initialize the level decoders and return the number of encoded level bytes.
-// The return value helps determine the number of bytes in the encoded data.
-template <typename DType>
-int64_t TypedRecordReader<DType>::InitializeLevelDecoders(
-    const DataPage& page, Encoding::type repetition_level_encoding,
-    Encoding::type definition_level_encoding) {
-  // Read a data page.
-  num_buffered_values_ = page.num_values();
-
-  // Have not decoded any values from the data page yet
-  num_decoded_values_ = 0;
-
-  const uint8_t* buffer = page.data();
-  int64_t levels_byte_size = 0;
-
-  // Data page Layout: Repetition Levels - Definition Levels - encoded values.
-  // Levels are encoded as rle or bit-packed.
-  // Init repetition levels
-  if (descr_->max_repetition_level() > 0) {
-    int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
-        repetition_level_encoding, descr_->max_repetition_level(),
-        static_cast<int>(num_buffered_values_), buffer);
-    buffer += rep_levels_bytes;
-    levels_byte_size += rep_levels_bytes;
-  }
-  // TODO figure a way to set max_definition_level_ to 0
-  // if the initial value is invalid
-
-  // Init definition levels
-  if (descr_->max_definition_level() > 0) {
-    int64_t def_levels_bytes = definition_level_decoder_.SetData(
-        definition_level_encoding, descr_->max_definition_level(),
-        static_cast<int>(num_buffered_values_), buffer);
-    levels_byte_size += def_levels_bytes;
-  }
-
-  return levels_byte_size;
-}
-
-// Get a decoder object for this page or create a new decoder if this is the
-// first page with this encoding.
-template <typename DType>
-void TypedRecordReader<DType>::InitializeDataDecoder(const DataPage& page,
-                                                     int64_t levels_byte_size) {
-  const uint8_t* buffer = page.data() + levels_byte_size;
-  const int64_t data_size = page.size() - levels_byte_size;
-
-  Encoding::type encoding = page.encoding();
-
-  if (IsDictionaryIndexEncoding(encoding)) {
-    encoding = Encoding::RLE_DICTIONARY;
-  }
-
-  auto it = decoders_.find(static_cast<int>(encoding));
-  if (it != decoders_.end()) {
-    DCHECK(it->second.get() != nullptr);
-    if (encoding == Encoding::RLE_DICTIONARY) {
-      DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
-    }
-    current_decoder_ = it->second.get();
-  } else {
-    switch (encoding) {
-      case Encoding::PLAIN: {
-        auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
-        current_decoder_ = decoder.get();
-        decoders_[static_cast<int>(encoding)] = std::move(decoder);
-        break;
-      }
-      case Encoding::RLE_DICTIONARY:
-        throw ParquetException("Dictionary page must be before data page.");
-
-      case Encoding::DELTA_BINARY_PACKED:
-      case Encoding::DELTA_LENGTH_BYTE_ARRAY:
-      case Encoding::DELTA_BYTE_ARRAY:
-        ParquetException::NYI("Unsupported encoding");
-
-      default:
-        throw ParquetException("Unknown encoding type.");
-    }
-  }
-  current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer,
-                            static_cast<int>(data_size));
-}
-
-template <typename DType>
-bool TypedRecordReader<DType>::ReadNewPage() {
-  // Loop until we find the next data page.
-  while (true) {
-    current_page_ = pager_->NextPage();
-    if (!current_page_) {
-      // EOS
-      return false;
-    }
-
-    if (current_page_->type() == PageType::DICTIONARY_PAGE) {
-      ConfigureDictionary(static_cast<const DictionaryPage*>(current_page_.get()));
-      continue;
-    } else if (current_page_->type() == PageType::DATA_PAGE) {
-      const auto page = std::static_pointer_cast<DataPageV1>(current_page_);
-      const int64_t levels_byte_size = InitializeLevelDecoders(
-          *page, page->repetition_level_encoding(), page->definition_level_encoding());
-      InitializeDataDecoder(*page, levels_byte_size);
-      return true;
-    } else if (current_page_->type() == PageType::DATA_PAGE_V2) {
-      const auto page = std::static_pointer_cast<DataPageV2>(current_page_);
-      // Repetition and definition levels are always encoded using RLE encoding
-      // in the DataPageV2 format.
-      const int64_t levels_byte_size =
-          InitializeLevelDecoders(*page, Encoding::RLE, Encoding::RLE);
-      InitializeDataDecoder(*page, levels_byte_size);
-      return true;
-    } else {
-      // We don't know what this page type is. We're allowed to skip non-data
-      // pages.
-      continue;
-    }
-  }
-  return true;
-}
-
-std::shared_ptr<RecordReader> RecordReader::MakeByteArrayRecordReader(
-    const ColumnDescriptor* descr, arrow::MemoryPool* pool, bool read_dictionary) {
-  if (read_dictionary) {
-    if (descr->converted_type() == ConvertedType::UTF8) {
-      using Builder = ::arrow::StringDictionaryBuilder;
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new ByteArrayDictionaryRecordReader<Builder>(descr, pool)));
-    } else {
-      using Builder = ::arrow::BinaryDictionaryBuilder;
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new ByteArrayDictionaryRecordReader<Builder>(descr, pool)));
-    }
-  } else {
-    return std::shared_ptr<RecordReader>(
-        new RecordReader(new ByteArrayChunkedRecordReader(descr, pool)));
-  }
-}
-
-std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
-                                                 MemoryPool* pool,
-                                                 const bool read_dictionary) {
-  switch (descr->physical_type()) {
-    case Type::BOOLEAN:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<BooleanType>(descr, pool)));
-    case Type::INT32:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<Int32Type>(descr, pool)));
-    case Type::INT64:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<Int64Type>(descr, pool)));
-    case Type::INT96:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<Int96Type>(descr, pool)));
-    case Type::FLOAT:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<FloatType>(descr, pool)));
-    case Type::DOUBLE:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new TypedRecordReader<DoubleType>(descr, pool)));
-    case Type::BYTE_ARRAY:
-      return RecordReader::MakeByteArrayRecordReader(descr, pool, read_dictionary);
-    case Type::FIXED_LEN_BYTE_ARRAY:
-      return std::shared_ptr<RecordReader>(
-          new RecordReader(new FLBARecordReader(descr, pool)));
-    default: {
-      // PARQUET-1481: This can occur if the file is corrupt
-      std::stringstream ss;
-      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
-      throw ParquetException(ss.str());
-    }
-  }
-  // Unreachable code, but supress compiler warning
-  return nullptr;
-}
-
-// ----------------------------------------------------------------------
-// Implement public API
-
-RecordReader::RecordReader(RecordReaderImpl* impl) { impl_.reset(impl); }
-
-RecordReader::~RecordReader() {}
-
-int64_t RecordReader::ReadRecords(int64_t num_records) {
-  return impl_->ReadRecords(num_records);
-}
-
-void RecordReader::Reset() { return impl_->Reset(); }
-
-void RecordReader::Reserve(int64_t num_values) { impl_->Reserve(num_values); }
-
-const int16_t* RecordReader::def_levels() const { return impl_->def_levels(); }
-
-const int16_t* RecordReader::rep_levels() const { return impl_->rep_levels(); }
-
-const uint8_t* RecordReader::values() const { return impl_->values(); }
-
-std::shared_ptr<ResizableBuffer> RecordReader::ReleaseValues() {
-  return impl_->ReleaseValues();
-}
-
-std::shared_ptr<ResizableBuffer> RecordReader::ReleaseIsValid() {
-  return impl_->ReleaseIsValid();
-}
-
-int64_t RecordReader::values_written() const { return impl_->values_written(); }
-
-int64_t RecordReader::levels_position() const { return impl_->levels_position(); }
-
-int64_t RecordReader::levels_written() const { return impl_->levels_written(); }
-
-int64_t RecordReader::null_count() const { return impl_->null_count(); }
-
-bool RecordReader::nullable_values() const { return impl_->nullable_values(); }
-
-bool RecordReader::HasMoreData() const { return impl_->HasMoreData(); }
-
-void RecordReader::SetPageReader(std::unique_ptr<PageReader> reader) {
-  impl_->SetPageReader(std::move(reader));
-}
-
-::arrow::ArrayVector RecordReader::GetBuilderChunks() {
-  return impl_->GetBuilderChunks();
-}
-
-void RecordReader::DebugPrintState() { impl_->DebugPrintState(); }
-
-}  // namespace internal
-}  // namespace parquet
diff --git a/cpp/src/parquet/arrow/record_reader.h b/cpp/src/parquet/arrow/record_reader.h
deleted file mode 100644
index 2ae26a5a47d..00000000000
--- a/cpp/src/parquet/arrow/record_reader.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef PARQUET_RECORD_READER_H
-#define PARQUET_RECORD_READER_H
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "parquet/platform.h"
-
-namespace arrow {
-
-class Array;
-
-}  // namespace arrow
-
-namespace parquet {
-
-class ColumnDescriptor;
-class PageReader;
-
-namespace internal {
-
-/// \brief Stateful column reader that delimits semantic records for both flat
-/// and nested columns
-///
-/// \note API EXPERIMENTAL
-/// \since 1.3.0
-class RecordReader {
- public:
-  // So that we can create subclasses
-  class RecordReaderImpl;
-
-  static std::shared_ptr<RecordReader> Make(
-      const ColumnDescriptor* descr,
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
-      const bool read_dictionary = false);
-
-  virtual ~RecordReader();
-
-  /// \brief Decoded definition levels
-  const int16_t* def_levels() const;
-
-  /// \brief Decoded repetition levels
-  const int16_t* rep_levels() const;
-
-  /// \brief Decoded values, including nulls, if any
-  const uint8_t* values() const;
-
-  /// \brief Attempt to read indicated number of records from column chunk
-  /// \return number of records read
-  int64_t ReadRecords(int64_t num_records);
-
-  /// \brief Pre-allocate space for data. Results in better flat read performance
-  void Reserve(int64_t num_values);
-
-  /// \brief Clear consumed values and repetition/definition levels as the
-  /// result of calling ReadRecords
-  void Reset();
-
-  std::shared_ptr<ResizableBuffer> ReleaseValues();
-  std::shared_ptr<ResizableBuffer> ReleaseIsValid();
-
-  /// \brief Number of values written including nulls (if any)
-  int64_t values_written() const;
-
-  /// \brief Number of definition / repetition levels (from those that have
-  /// been decoded) that have been consumed inside the reader.
-  int64_t levels_position() const;
-
-  /// \brief Number of definition / repetition levels that have been written
-  /// internally in the reader
-  int64_t levels_written() const;
-
-  /// \brief Number of nulls in the leaf
-  int64_t null_count() const;
-
-  /// \brief True if the leaf values are nullable
-  bool nullable_values() const;
-
-  /// \brief Return true if the record reader has more internal data yet to
-  /// process
-  bool HasMoreData() const;
-
-  /// \brief Advance record reader to the next row group
-  /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
-  void SetPageReader(std::unique_ptr<PageReader> reader);
-
-  void DebugPrintState();
-
-  // For BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY types that may have chunked output
-  std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks();
-
- private:
-  std::unique_ptr<RecordReaderImpl> impl_;
-  explicit RecordReader(RecordReaderImpl* impl);
-
-  static std::shared_ptr<RecordReader> MakeByteArrayRecordReader(
-      const ColumnDescriptor* descr, ::arrow::MemoryPool* pool,
-      const bool read_dictionary);
-};
-
-}  // namespace internal
-}  // namespace parquet
-
-#endif  // PARQUET_RECORD_READER_H
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 9284c8c8024..f77bf38f9e2 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -134,23 +134,22 @@ static Status MakeArrowTime64(const LogicalType& logical_type,
 
 static Status MakeArrowTimestamp(const LogicalType& logical_type,
                                  std::shared_ptr<ArrowType>* out) {
-  static const char* utc = "UTC";
   const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
+  const bool utc_normalized =
+      timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc();
+  static const char* utc_timezone = "UTC";
   switch (timestamp.time_unit()) {
     case LogicalType::TimeUnit::MILLIS:
-      *out = (timestamp.is_adjusted_to_utc()
-                  ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc)
-                  : ::arrow::timestamp(::arrow::TimeUnit::MILLI));
+      *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MILLI, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::MILLI));
       break;
     case LogicalType::TimeUnit::MICROS:
-      *out = (timestamp.is_adjusted_to_utc()
-                  ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc)
-                  : ::arrow::timestamp(::arrow::TimeUnit::MICRO));
+      *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::MICRO, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::MICRO));
       break;
     case LogicalType::TimeUnit::NANOS:
-      *out = (timestamp.is_adjusted_to_utc()
-                  ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc)
-                  : ::arrow::timestamp(::arrow::TimeUnit::NANO));
+      *out = (utc_normalized ? ::arrow::timestamp(::arrow::TimeUnit::NANO, utc_timezone)
+                             : ::arrow::timestamp(::arrow::TimeUnit::NANO));
       break;
     default:
       return Status::TypeError("Unrecognized time unit in timestamp logical_type: ",
@@ -521,11 +520,21 @@ Status StructToNode(const std::shared_ptr<::arrow::StructType>& type,
 static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp(
     const ::arrow::TimestampType& timestamp_type, ::arrow::TimeUnit::type time_unit) {
   const bool utc = !(timestamp_type.timezone().empty());
+  // ARROW-5878(wesm): for forward compatibility reasons, and because
+  // there's no other way to signal to old readers that values are
+  // timestamps, we force the ConvertedType field to be set to the
+  // corresponding TIMESTAMP_* value. This does cause some ambiguity
+  // as Parquet readers have not been consistent about the
+  // interpretation of TIMESTAMP_* values as being UTC-normalized.
   switch (time_unit) {
     case ::arrow::TimeUnit::MILLI:
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS);
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MILLIS,
+                                    /*is_from_converted_type=*/false,
+                                    /*force_set_converted_type=*/true);
     case ::arrow::TimeUnit::MICRO:
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS);
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::MICROS,
+                                    /*is_from_converted_type=*/false,
+                                    /*force_set_converted_type=*/true);
     case ::arrow::TimeUnit::NANO:
       return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
     case ::arrow::TimeUnit::SECOND:
diff --git a/cpp/src/parquet/arrow/test-util.h b/cpp/src/parquet/arrow/test-util.h
index b99e28f5e03..8760d91f2a9 100644
--- a/cpp/src/parquet/arrow/test-util.h
+++ b/cpp/src/parquet/arrow/test-util.h
@@ -30,7 +30,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/decimal.h"
 
-#include "parquet/arrow/record_reader.h"
+#include "parquet/column_reader.h"
 
 namespace parquet {
 
@@ -429,26 +429,11 @@ Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
   return Status::OK();
 }
 
-static inline std::shared_ptr<::arrow::Column> MakeColumn(
-    const std::string& name, const std::shared_ptr<Array>& array, bool nullable) {
-  auto field = ::arrow::field(name, array->type(), nullable);
-  return std::make_shared<::arrow::Column>(field, array);
-}
-
-static inline std::shared_ptr<::arrow::Column> MakeColumn(
-    const std::string& name, const std::vector<std::shared_ptr<Array>>& arrays,
-    bool nullable) {
-  auto field = ::arrow::field(name, arrays[0]->type(), nullable);
-  return std::make_shared<::arrow::Column>(field, arrays);
-}
-
 std::shared_ptr<::arrow::Table> MakeSimpleTable(const std::shared_ptr<Array>& values,
                                                 bool nullable) {
-  std::shared_ptr<::arrow::Column> column = MakeColumn("col", values, nullable);
-  std::vector<std::shared_ptr<::arrow::Column>> columns({column});
-  std::vector<std::shared_ptr<::arrow::Field>> fields({column->field()});
-  auto schema = std::make_shared<::arrow::Schema>(fields);
-  return ::arrow::Table::Make(schema, columns);
+  auto carr = std::make_shared<::arrow::ChunkedArray>(values);
+  auto schema = ::arrow::schema({::arrow::field("col", values->type(), nullable)});
+  return ::arrow::Table::Make(schema, {carr});
 }
 
 template <typename T>
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 91811203f92..fd40319fde8 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -49,8 +49,7 @@ using arrow::ChunkedArray;
 using arrow::Decimal128Array;
 using arrow::Field;
 using arrow::FixedSizeBinaryArray;
-using arrow::Int16Array;
-using arrow::Int16Builder;
+using Int16BufferBuilder = arrow::TypedBufferBuilder<int16_t>;
 using arrow::ListArray;
 using arrow::MemoryPool;
 using arrow::NumericArray;
@@ -81,8 +80,7 @@ namespace {
 
 class LevelBuilder {
  public:
-  explicit LevelBuilder(MemoryPool* pool)
-      : def_levels_(::arrow::int16(), pool), rep_levels_(::arrow::int16(), pool) {}
+  explicit LevelBuilder(MemoryPool* pool) : def_levels_(pool), rep_levels_(pool) {}
 
   Status VisitInline(const Array& array);
 
@@ -102,6 +100,7 @@ class LevelBuilder {
     null_counts_.push_back(array.null_count());
     offsets_.push_back(array.raw_value_offsets());
 
+    // Min offset isn't always zero in the case of sliced Arrays.
     min_offset_idx_ = array.value_offset(min_offset_idx_);
     max_offset_idx_ = array.value_offset(max_offset_idx_);
 
@@ -176,18 +175,17 @@ class LevelBuilder {
       }
       *num_levels = array.length();
     } else {
+      // Note it is hard to estimate memory consumption due to zero length
+      // arrays otherwise we would preallocate.  An upper boun on memory
+      // is the sum of the length of each list array  + number of elements
+      // but this might be too loose of an upper bound so we choose to use
+      // safe methods.
       RETURN_NOT_OK(rep_levels_.Append(0));
       RETURN_NOT_OK(HandleListEntries(0, 0, 0, array.length()));
 
-      std::shared_ptr<Array> def_levels_array;
-      std::shared_ptr<Array> rep_levels_array;
-
-      RETURN_NOT_OK(def_levels_.Finish(&def_levels_array));
-      RETURN_NOT_OK(rep_levels_.Finish(&rep_levels_array));
-
-      *def_levels_out = static_cast<PrimitiveArray*>(def_levels_array.get())->values();
-      *rep_levels_out = static_cast<PrimitiveArray*>(rep_levels_array.get())->values();
-      *num_levels = rep_levels_array->length();
+      RETURN_NOT_OK(def_levels_.Finish(def_levels_out));
+      RETURN_NOT_OK(rep_levels_.Finish(rep_levels_out));
+      *num_levels = (*rep_levels_out)->size() / sizeof(int16_t);
     }
 
     return Status::OK();
@@ -217,36 +215,37 @@ class LevelBuilder {
       return HandleListEntries(static_cast<int16_t>(def_level + 1),
                                static_cast<int16_t>(rep_level + 1), inner_offset,
                                inner_length);
-    } else {
-      // We have reached the leaf: primitive list, handle remaining nullables
-      const bool nullable_level = nullable_[recursion_level];
-      const int64_t level_null_count = null_counts_[recursion_level];
-      const uint8_t* level_valid_bitmap = valid_bitmaps_[recursion_level];
-
-      for (int64_t i = 0; i < inner_length; i++) {
-        if (i > 0) {
-          RETURN_NOT_OK(rep_levels_.Append(static_cast<int16_t>(rep_level + 1)));
-        }
-        if (level_null_count && level_valid_bitmap == nullptr) {
-          // Special case: this is a null array (all elements are null)
-          RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 1)));
-        } else if (nullable_level &&
-                   ((level_null_count == 0) ||
-                    BitUtil::GetBit(
-                        level_valid_bitmap,
-                        inner_offset + i + array_offsets_[recursion_level]))) {
-          // Non-null element in a null level
-          RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 2)));
-        } else {
-          // This can be produced in two case:
-          //  * elements are nullable and this one is null (i.e. max_def_level = def_level
-          //  + 2)
-          //  * elements are non-nullable (i.e. max_def_level = def_level + 1)
-          RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 1)));
-        }
+    }
+    // We have reached the leaf: primitive list, handle remaining nullables
+    const bool nullable_level = nullable_[recursion_level];
+    const int64_t level_null_count = null_counts_[recursion_level];
+    const uint8_t* level_valid_bitmap = valid_bitmaps_[recursion_level];
+
+    if (inner_length >= 1) {
+      RETURN_NOT_OK(
+          rep_levels_.Append(inner_length - 1, static_cast<int16_t>(rep_level + 1)));
+    }
+
+    // Special case: this is a null array (all elements are null)
+    if (level_null_count && level_valid_bitmap == nullptr) {
+      return def_levels_.Append(inner_length, static_cast<int16_t>(def_level + 1));
+    }
+    for (int64_t i = 0; i < inner_length; i++) {
+      if (nullable_level &&
+          ((level_null_count == 0) ||
+           BitUtil::GetBit(level_valid_bitmap,
+                           inner_offset + i + array_offsets_[recursion_level]))) {
+        // Non-null element in a null level
+        RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 2)));
+      } else {
+        // This can be produced in two cases:
+        //  * elements are nullable and this one is null
+        //   (i.e. max_def_level = def_level + 2)
+        //  * elements are non-nullable (i.e. max_def_level = def_level + 1)
+        RETURN_NOT_OK(def_levels_.Append(static_cast<int16_t>(def_level + 1)));
       }
-      return Status::OK();
     }
+    return Status::OK();
   }
 
   Status HandleListEntries(int16_t def_level, int16_t rep_level, int64_t offset,
@@ -261,8 +260,8 @@ class LevelBuilder {
   }
 
  private:
-  Int16Builder def_levels_;
-  Int16Builder rep_levels_;
+  Int16BufferBuilder def_levels_;
+  Int16BufferBuilder rep_levels_;
 
   std::vector<int64_t> null_counts_;
   std::vector<const uint8_t*> valid_bitmaps_;
@@ -307,7 +306,7 @@ struct ColumnWriterContext {
 Status GetLeafType(const ::arrow::DataType& type, ::arrow::Type::type* leaf_type) {
   if (type.id() == ::arrow::Type::LIST || type.id() == ::arrow::Type::STRUCT) {
     if (type.num_children() != 1) {
-      return Status::Invalid("Nested column branch had multiple children");
+      return Status::Invalid("Nested column branch had multiple children: ", type);
     }
     return GetLeafType(*type.child(0)->type(), leaf_type);
   } else {
@@ -1195,8 +1194,7 @@ Status FileWriter::WriteTable(const Table& table, int64_t chunk_size) {
   auto WriteRowGroup = [&](int64_t offset, int64_t size) {
     RETURN_NOT_OK(NewRowGroup(size));
     for (int i = 0; i < table.num_columns(); i++) {
-      auto chunked_data = table.column(i)->data();
-      RETURN_NOT_OK(WriteColumnChunk(chunked_data, offset, size));
+      RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
     }
     return Status::OK();
   };
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 8014e1a3511..6906e4635ae 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -211,8 +211,10 @@ inline void ArrowTimestampToImpalaTimestamp(const int64_t time, Int96* impala_ti
   (*impala_timestamp).value[2] = (uint32_t)julian_days;
 
   int64_t last_day_units = time % UnitPerDay;
-  int64_t* impala_last_day_nanos = reinterpret_cast<int64_t*>(impala_timestamp);
-  *impala_last_day_nanos = last_day_units * NanosecondsPerUnit;
+  auto last_day_nanos = last_day_units * NanosecondsPerUnit;
+  // impala_timestamp will be unaligned every other entry so do memcpy instead
+  // of assign and reinterpret cast to avoid undefined behavior.
+  std::memcpy(impala_timestamp, &last_day_nanos, sizeof(int64_t));
 }
 
 constexpr int64_t kSecondsInNanos = INT64_C(1000000000);
diff --git a/cpp/src/parquet/column_reader-test.cc b/cpp/src/parquet/column_reader-test.cc
index 46d099ed9c7..b6b928c51cc 100644
--- a/cpp/src/parquet/column_reader-test.cc
+++ b/cpp/src/parquet/column_reader-test.cc
@@ -410,5 +410,27 @@ TEST(TestColumnReader, DefinitionLevelsToBitmap) {
   ASSERT_EQ(current_byte, valid_bits[1]);
 }
 
+TEST(TestColumnReader, DefinitionLevelsToBitmapPowerOfTwo) {
+  // PARQUET-1623: Invalid memory access when decoding a valid bits vector that has a
+  // length equal to a power of two and also using a non-zero valid_bits_offset.  This
+  // should not fail when run with ASAN or valgrind.
+  std::vector<int16_t> def_levels = {3, 3, 3, 2, 3, 3, 3, 3};
+  std::vector<int16_t> rep_levels = {0, 1, 1, 1, 1, 1, 1, 1};
+  std::vector<uint8_t> valid_bits(1, 0);
+
+  const int max_def_level = 3;
+  const int max_rep_level = 1;
+
+  int64_t values_read = -1;
+  int64_t null_count = 0;
+
+  // Read the latter half of the validity bitmap
+  internal::DefinitionLevelsToBitmap(def_levels.data() + 4, 4, max_def_level,
+                                     max_rep_level, &values_read, &null_count,
+                                     valid_bits.data(), 4 /* valid_bits_offset */);
+  ASSERT_EQ(4, values_read);
+  ASSERT_EQ(0, null_count);
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f66224edd47..6727fe6fcd0 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -21,12 +21,15 @@
 #include <exception>
 #include <iostream>
 #include <memory>
+#include <vector>
 
 #include "arrow/buffer.h"
+#include "arrow/builder.h"
 #include "arrow/util/bit-stream-utils.h"
 #include "arrow/util/compression.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/rle-encoding.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_page.h"
 #include "parquet/encoding.h"
@@ -50,7 +53,7 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
   bit_width_ = BitUtil::Log2(max_level + 1);
   switch (encoding) {
     case Encoding::RLE: {
-      num_bytes = *reinterpret_cast<const int32_t*>(data);
+      num_bytes = arrow::util::SafeLoadAs<int32_t>(data);
       const uint8_t* decoder_data = data + sizeof(int32_t);
       if (!rle_decoder_) {
         rle_decoder_.reset(
@@ -263,33 +266,63 @@ std::unique_ptr<PageReader> PageReader::Open(
 }
 
 // ----------------------------------------------------------------------
-// TypedColumnReader implementations
+// Impl base class for TypedColumnReader and RecordReader
+
+// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
+// encoding.
+static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
+  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
+}
 
 template <typename DType>
-class TypedColumnReaderImpl : public TypedColumnReader<DType> {
+class ColumnReaderImplBase {
  public:
   using T = typename DType::c_type;
 
-  TypedColumnReaderImpl(const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
-                        ::arrow::MemoryPool* pool)
+  ColumnReaderImplBase(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
       : descr_(descr),
-        pager_(std::move(pager)),
+        max_def_level_(descr->max_definition_level()),
+        max_rep_level_(descr->max_repetition_level()),
         num_buffered_values_(0),
         num_decoded_values_(0),
         pool_(pool),
-        current_decoder_(NULLPTR) {}
+        current_decoder_(nullptr) {}
 
-  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                    T* values, int64_t* values_read) override;
+  virtual ~ColumnReaderImplBase() = default;
 
-  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
-                          int64_t* levels_read, int64_t* values_read,
-                          int64_t* null_count) override;
+ protected:
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValues(int64_t batch_size, T* out) {
+    int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
+    return num_decoded;
+  }
 
-  int64_t Skip(int64_t num_rows_to_skip) override;
+  // Read up to batch_size values from the current data page into the
+  // pre-allocated memory T*, leaving spaces for null entries according
+  // to the def_levels.
+  //
+  // @returns: the number of values read into the out buffer
+  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count,
+                           uint8_t* valid_bits, int64_t valid_bits_offset) {
+    return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size),
+                                          static_cast<int>(null_count), valid_bits,
+                                          valid_bits_offset);
+  }
+
+  // Read multiple definition levels into preallocated memory
+  //
+  // Returns the number of decoded definition levels
+  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
+    if (max_def_level_ == 0) {
+      return 0;
+    }
+    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
 
-  bool HasNext() override {
+  bool HasNextInternal() {
     // Either there is no data page available yet, or the data page has been
     // exhausted
     if (num_buffered_values_ == 0 || num_decoded_values_ == num_buffered_values_) {
@@ -300,42 +333,173 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType> {
     return true;
   }
 
-  Type::type type() const override { return descr_->physical_type(); }
-
-  const ColumnDescriptor* descr() const override { return descr_; }
-
- protected:
-  using DecoderType = TypedDecoder<DType>;
+  // Read multiple repetition levels into preallocated memory
+  // Returns the number of decoded repetition levels
+  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
+    if (max_rep_level_ == 0) {
+      return 0;
+    }
+    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+  }
 
   // Advance to the next data page
-  bool ReadNewPage();
+  bool ReadNewPage() {
+    // Loop until we find the next data page.
+    while (true) {
+      current_page_ = pager_->NextPage();
+      if (!current_page_) {
+        // EOS
+        return false;
+      }
 
-  // Read multiple definition levels into preallocated memory
-  //
-  // Returns the number of decoded definition levels
-  int64_t ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
-    if (descr_->max_definition_level() == 0) {
-      return 0;
+      if (current_page_->type() == PageType::DICTIONARY_PAGE) {
+        ConfigureDictionary(static_cast<const DictionaryPage*>(current_page_.get()));
+        continue;
+      } else if (current_page_->type() == PageType::DATA_PAGE) {
+        const auto page = std::static_pointer_cast<DataPageV1>(current_page_);
+        const int64_t levels_byte_size = InitializeLevelDecoders(
+            *page, page->repetition_level_encoding(), page->definition_level_encoding());
+        InitializeDataDecoder(*page, levels_byte_size);
+        return true;
+      } else if (current_page_->type() == PageType::DATA_PAGE_V2) {
+        const auto page = std::static_pointer_cast<DataPageV2>(current_page_);
+        // Repetition and definition levels are always encoded using RLE encoding
+        // in the DataPageV2 format.
+        const int64_t levels_byte_size =
+            InitializeLevelDecoders(*page, Encoding::RLE, Encoding::RLE);
+        InitializeDataDecoder(*page, levels_byte_size);
+        return true;
+      } else {
+        // We don't know what this page type is. We're allowed to skip non-data
+        // pages.
+        continue;
+      }
     }
-    return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+    return true;
   }
 
-  // Read multiple repetition levels into preallocated memory
-  // Returns the number of decoded repetition levels
-  int64_t ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
-    if (descr_->max_repetition_level() == 0) {
-      return 0;
+  void ConfigureDictionary(const DictionaryPage* page) {
+    int encoding = static_cast<int>(page->encoding());
+    if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
+        page->encoding() == Encoding::PLAIN) {
+      encoding = static_cast<int>(Encoding::RLE_DICTIONARY);
     }
-    return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
+
+    auto it = decoders_.find(encoding);
+    if (it != decoders_.end()) {
+      throw ParquetException("Column cannot have more than one dictionary.");
+    }
+
+    if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
+        page->encoding() == Encoding::PLAIN) {
+      auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+      dictionary->SetData(page->num_values(), page->data(), page->size());
+
+      // The dictionary is fully decoded during DictionaryDecoder::Init, so the
+      // DictionaryPage buffer is no longer required after this step
+      //
+      // TODO(wesm): investigate whether this all-or-nothing decoding of the
+      // dictionary makes sense and whether performance can be improved
+
+      std::unique_ptr<DictDecoder<DType>> decoder = MakeDictDecoder<DType>(descr_, pool_);
+      decoder->SetDict(dictionary.get());
+      decoders_[encoding] =
+          std::unique_ptr<DecoderType>(dynamic_cast<DecoderType*>(decoder.release()));
+    } else {
+      ParquetException::NYI("only plain dictionary encoding has been implemented");
+    }
+
+    current_decoder_ = decoders_[encoding].get();
+    DCHECK(current_decoder_);
   }
 
-  int64_t available_values_current_page() const {
-    return num_buffered_values_ - num_decoded_values_;
+  // Initialize repetition and definition level decoders on the next data page.
+
+  // If the data page includes repetition and definition levels, we
+  // initialize the level decoders and return the number of encoded level bytes.
+  // The return value helps determine the number of bytes in the encoded data.
+  int64_t InitializeLevelDecoders(const DataPage& page,
+                                  Encoding::type repetition_level_encoding,
+                                  Encoding::type definition_level_encoding) {
+    // Read a data page.
+    num_buffered_values_ = page.num_values();
+
+    // Have not decoded any values from the data page yet
+    num_decoded_values_ = 0;
+
+    const uint8_t* buffer = page.data();
+    int64_t levels_byte_size = 0;
+
+    // Data page Layout: Repetition Levels - Definition Levels - encoded values.
+    // Levels are encoded as rle or bit-packed.
+    // Init repetition levels
+    if (max_rep_level_ > 0) {
+      int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
+          repetition_level_encoding, max_rep_level_,
+          static_cast<int>(num_buffered_values_), buffer);
+      buffer += rep_levels_bytes;
+      levels_byte_size += rep_levels_bytes;
+    }
+    // TODO figure a way to set max_def_level_ to 0
+    // if the initial value is invalid
+
+    // Init definition levels
+    if (max_def_level_ > 0) {
+      int64_t def_levels_bytes = definition_level_decoder_.SetData(
+          definition_level_encoding, max_def_level_,
+          static_cast<int>(num_buffered_values_), buffer);
+      levels_byte_size += def_levels_bytes;
+    }
+
+    return levels_byte_size;
   }
 
-  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
+  // Get a decoder object for this page or create a new decoder if this is the
+  // first page with this encoding.
+  void InitializeDataDecoder(const DataPage& page, int64_t levels_byte_size) {
+    const uint8_t* buffer = page.data() + levels_byte_size;
+    const int64_t data_size = page.size() - levels_byte_size;
+
+    Encoding::type encoding = page.encoding();
+
+    if (IsDictionaryIndexEncoding(encoding)) {
+      encoding = Encoding::RLE_DICTIONARY;
+    }
+
+    auto it = decoders_.find(static_cast<int>(encoding));
+    if (it != decoders_.end()) {
+      DCHECK(it->second.get() != nullptr);
+      if (encoding == Encoding::RLE_DICTIONARY) {
+        DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
+      }
+      current_decoder_ = it->second.get();
+    } else {
+      switch (encoding) {
+        case Encoding::PLAIN: {
+          auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+          current_decoder_ = decoder.get();
+          decoders_[static_cast<int>(encoding)] = std::move(decoder);
+          break;
+        }
+        case Encoding::RLE_DICTIONARY:
+          throw ParquetException("Dictionary page must be before data page.");
+
+        case Encoding::DELTA_BINARY_PACKED:
+        case Encoding::DELTA_LENGTH_BYTE_ARRAY:
+        case Encoding::DELTA_BYTE_ARRAY:
+          ParquetException::NYI("Unsupported encoding");
+
+        default:
+          throw ParquetException("Unknown encoding type.");
+      }
+    }
+    current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer,
+                              static_cast<int>(data_size));
+  }
 
   const ColumnDescriptor* descr_;
+  const int16_t max_def_level_;
+  const int16_t max_rep_level_;
 
   std::unique_ptr<PageReader> pager_;
   std::shared_ptr<Page> current_page_;
@@ -360,44 +524,48 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType> {
 
   ::arrow::MemoryPool* pool_;
 
-  // Read up to batch_size values from the current data page into the
-  // pre-allocated memory T*
-  //
-  // @returns: the number of values read into the out buffer
-  int64_t ReadValues(int64_t batch_size, T* out);
-
-  // Read up to batch_size values from the current data page into the
-  // pre-allocated memory T*, leaving spaces for null entries according
-  // to the def_levels.
-  //
-  // @returns: the number of values read into the out buffer
-  int64_t ReadValuesSpaced(int64_t batch_size, T* out, int64_t null_count,
-                           uint8_t* valid_bits, int64_t valid_bits_offset);
+  using DecoderType = typename EncodingTraits<DType>::Decoder;
+  DecoderType* current_decoder_;
 
   // Map of encoding type to the respective decoder object. For example, a
   // column chunk's data pages may include both dictionary-encoded and
   // plain-encoded data.
   std::unordered_map<int, std::unique_ptr<DecoderType>> decoders_;
 
-  void ConfigureDictionary(const DictionaryPage* page);
-  DecoderType* current_decoder_;
+  void ConsumeBufferedValues(int64_t num_values) { num_decoded_values_ += num_values; }
 };
 
-template <typename DType>
-int64_t TypedColumnReaderImpl<DType>::ReadValues(int64_t batch_size, T* out) {
-  int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
-  return num_decoded;
-}
+// ----------------------------------------------------------------------
+// TypedColumnReader implementations
 
 template <typename DType>
-int64_t TypedColumnReaderImpl<DType>::ReadValuesSpaced(int64_t batch_size, T* out,
-                                                       int64_t null_count,
-                                                       uint8_t* valid_bits,
-                                                       int64_t valid_bits_offset) {
-  return current_decoder_->DecodeSpaced(out, static_cast<int>(batch_size),
-                                        static_cast<int>(null_count), valid_bits,
-                                        valid_bits_offset);
-}
+class TypedColumnReaderImpl : public TypedColumnReader<DType>,
+                              public ColumnReaderImplBase<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  TypedColumnReaderImpl(const ColumnDescriptor* descr, std::unique_ptr<PageReader> pager,
+                        ::arrow::MemoryPool* pool)
+      : ColumnReaderImplBase<DType>(descr, pool) {
+    this->pager_ = std::move(pager);
+  }
+
+  bool HasNext() override { return this->HasNextInternal(); }
+
+  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                    T* values, int64_t* values_read) override;
+
+  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
+                          int64_t* levels_read, int64_t* values_read,
+                          int64_t* null_count) override;
+
+  int64_t Skip(int64_t num_rows_to_skip) override;
+
+  Type::type type() const override { return this->descr_->physical_type(); }
+
+  const ColumnDescriptor* descr() const override { return this->descr_; }
+};
 
 template <typename DType>
 int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def_levels,
@@ -411,7 +579,8 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
 
   // TODO(wesm): keep reading data pages until batch_size is reached, or the
   // row group is finished
-  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
+  batch_size =
+      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
 
   int64_t num_def_levels = 0;
   int64_t num_rep_levels = 0;
@@ -419,12 +588,12 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
   int64_t values_to_read = 0;
 
   // If the field is required and non-repeated, there are no definition levels
-  if (descr_->max_definition_level() > 0 && def_levels) {
-    num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
+  if (this->max_def_level_ > 0 && def_levels) {
+    num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
     // TODO(wesm): this tallying of values-to-decode can be performed with better
     // cache-efficiency if fused with the level decoding.
     for (int64_t i = 0; i < num_def_levels; ++i) {
-      if (def_levels[i] == descr_->max_definition_level()) {
+      if (def_levels[i] == this->max_def_level_) {
         ++values_to_read;
       }
     }
@@ -434,16 +603,16 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
   }
 
   // Not present for non-repeated fields
-  if (descr_->max_repetition_level() > 0 && rep_levels) {
-    num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
+  if (this->max_rep_level_ > 0 && rep_levels) {
+    num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
     if (def_levels && num_def_levels != num_rep_levels) {
       throw ParquetException("Number of decoded rep / def levels did not match");
     }
   }
 
-  *values_read = ReadValues(values_to_read, values);
+  *values_read = this->ReadValues(values_to_read, values);
   int64_t total_values = std::max(num_def_levels, *values_read);
-  ConsumeBufferedValues(total_values);
+  this->ConsumeBufferedValues(total_values);
 
   return total_values;
 }
@@ -464,50 +633,50 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
   int64_t total_values;
   // TODO(wesm): keep reading data pages until batch_size is reached, or the
   // row group is finished
-  batch_size = std::min(batch_size, num_buffered_values_ - num_decoded_values_);
+  batch_size =
+      std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_);
 
   // If the field is required and non-repeated, there are no definition levels
-  if (descr_->max_definition_level() > 0) {
-    int64_t num_def_levels = ReadDefinitionLevels(batch_size, def_levels);
+  if (this->max_def_level_ > 0) {
+    int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
 
     // Not present for non-repeated fields
-    if (descr_->max_repetition_level() > 0) {
-      int64_t num_rep_levels = ReadRepetitionLevels(batch_size, rep_levels);
+    if (this->max_rep_level_ > 0) {
+      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
       if (num_def_levels != num_rep_levels) {
         throw ParquetException("Number of decoded rep / def levels did not match");
       }
     }
 
-    const bool has_spaced_values = internal::HasSpacedValues(descr_);
+    const bool has_spaced_values = internal::HasSpacedValues(this->descr_);
 
     int64_t null_count = 0;
     if (!has_spaced_values) {
       int values_to_read = 0;
       for (int64_t i = 0; i < num_def_levels; ++i) {
-        if (def_levels[i] == descr_->max_definition_level()) {
+        if (def_levels[i] == this->max_def_level_) {
           ++values_to_read;
         }
       }
-      total_values = ReadValues(values_to_read, values);
+      total_values = this->ReadValues(values_to_read, values);
       for (int64_t i = 0; i < total_values; i++) {
         ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
       }
       *values_read = total_values;
     } else {
-      int16_t max_definition_level = descr_->max_definition_level();
-      int16_t max_repetition_level = descr_->max_repetition_level();
-      internal::DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level,
-                                         max_repetition_level, values_read, &null_count,
+      internal::DefinitionLevelsToBitmap(def_levels, num_def_levels, this->max_def_level_,
+                                         this->max_rep_level_, values_read, &null_count,
                                          valid_bits, valid_bits_offset);
-      total_values = ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
-                                      valid_bits, valid_bits_offset);
+      total_values =
+          this->ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
+                                 valid_bits, valid_bits_offset);
     }
     *levels_read = num_def_levels;
     *null_count_out = null_count;
 
   } else {
     // Required field, read all values
-    total_values = ReadValues(batch_size, values);
+    total_values = this->ReadValues(batch_size, values);
     for (int64_t i = 0; i < total_values; i++) {
       ::arrow::BitUtil::SetBit(valid_bits, valid_bits_offset + i);
     }
@@ -515,7 +684,7 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
     *levels_read = total_values;
   }
 
-  ConsumeBufferedValues(*levels_read);
+  this->ConsumeBufferedValues(*levels_read);
   return total_values;
 }
 
@@ -525,29 +694,27 @@ int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
   while (HasNext() && rows_to_skip > 0) {
     // If the number of rows to skip is more than the number of undecoded values, skip the
     // Page.
-    if (rows_to_skip > (num_buffered_values_ - num_decoded_values_)) {
-      rows_to_skip -= num_buffered_values_ - num_decoded_values_;
-      num_decoded_values_ = num_buffered_values_;
+    if (rows_to_skip > (this->num_buffered_values_ - this->num_decoded_values_)) {
+      rows_to_skip -= this->num_buffered_values_ - this->num_decoded_values_;
+      this->num_decoded_values_ = this->num_buffered_values_;
     } else {
       // We need to read this Page
       // Jump to the right offset in the Page
       int64_t batch_size = 1024;  // ReadBatch with a smaller memory footprint
       int64_t values_read = 0;
 
-      std::shared_ptr<ResizableBuffer> vals = AllocateBuffer(
+      // This will be enough scratch space to accommodate 16-bit levels or any
+      // value type
+      std::shared_ptr<ResizableBuffer> scratch = AllocateBuffer(
           this->pool_, batch_size * type_traits<DType::type_num>::value_byte_size);
-      std::shared_ptr<ResizableBuffer> def_levels =
-          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
-
-      std::shared_ptr<ResizableBuffer> rep_levels =
-          AllocateBuffer(this->pool_, batch_size * sizeof(int16_t));
 
       do {
         batch_size = std::min(batch_size, rows_to_skip);
-        values_read = ReadBatch(static_cast<int>(batch_size),
-                                reinterpret_cast<int16_t*>(def_levels->mutable_data()),
-                                reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
-                                reinterpret_cast<T*>(vals->mutable_data()), &values_read);
+        values_read =
+            ReadBatch(static_cast<int>(batch_size),
+                      reinterpret_cast<int16_t*>(scratch->mutable_data()),
+                      reinterpret_cast<int16_t*>(scratch->mutable_data()),
+                      reinterpret_cast<T*>(scratch->mutable_data()), &values_read);
         rows_to_skip -= values_read;
       } while (values_read > 0 && rows_to_skip > 0);
     }
@@ -555,144 +722,6 @@ int64_t TypedColumnReaderImpl<DType>::Skip(int64_t num_rows_to_skip) {
   return num_rows_to_skip - rows_to_skip;
 }
 
-template <typename DType>
-void TypedColumnReaderImpl<DType>::ConfigureDictionary(const DictionaryPage* page) {
-  int encoding = static_cast<int>(page->encoding());
-  if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
-      page->encoding() == Encoding::PLAIN) {
-    encoding = static_cast<int>(Encoding::RLE_DICTIONARY);
-  }
-
-  auto it = decoders_.find(encoding);
-  if (it != decoders_.end()) {
-    throw ParquetException("Column cannot have more than one dictionary.");
-  }
-
-  if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
-      page->encoding() == Encoding::PLAIN) {
-    auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
-    dictionary->SetData(page->num_values(), page->data(), page->size());
-
-    // The dictionary is fully decoded during SetData, so the
-    // DictionaryPage buffer is no longer required after this step
-    //
-    // TODO(wesm): investigate whether this all-or-nothing decoding of the
-    // dictionary makes sense and whether performance can be improved
-    auto decoder = MakeDictDecoder<DType>(descr_, pool_);
-    decoder->SetDict(dictionary.get());
-    decoders_[encoding] = std::move(decoder);
-  } else {
-    ParquetException::NYI("only plain dictionary encoding has been implemented");
-  }
-
-  current_decoder_ = decoders_[encoding].get();
-}
-
-// PLAIN_DICTIONARY is deprecated but used to be used as a dictionary index
-// encoding.
-static bool IsDictionaryIndexEncoding(const Encoding::type& e) {
-  return e == Encoding::RLE_DICTIONARY || e == Encoding::PLAIN_DICTIONARY;
-}
-
-template <typename DType>
-bool TypedColumnReaderImpl<DType>::ReadNewPage() {
-  // Loop until we find the next data page.
-  const uint8_t* buffer;
-
-  while (true) {
-    current_page_ = pager_->NextPage();
-    if (!current_page_) {
-      // EOS
-      return false;
-    }
-
-    if (current_page_->type() == PageType::DICTIONARY_PAGE) {
-      ConfigureDictionary(static_cast<const DictionaryPage*>(current_page_.get()));
-      continue;
-    } else if (current_page_->type() == PageType::DATA_PAGE) {
-      const DataPageV1& page = static_cast<const DataPageV1&>(*current_page_);
-
-      // Read a data page.
-      num_buffered_values_ = page.num_values();
-
-      // Have not decoded any values from the data page yet
-      num_decoded_values_ = 0;
-
-      buffer = page.data();
-
-      // If the data page includes repetition and definition levels, we
-      // initialize the level decoder and subtract the encoded level bytes from
-      // the page size to determine the number of bytes in the encoded data.
-      int64_t data_size = page.size();
-
-      // Data page Layout: Repetition Levels - Definition Levels - encoded values.
-      // Levels are encoded as rle or bit-packed.
-      // Init repetition levels
-      if (descr_->max_repetition_level() > 0) {
-        int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
-            page.repetition_level_encoding(), descr_->max_repetition_level(),
-            static_cast<int>(num_buffered_values_), buffer);
-        buffer += rep_levels_bytes;
-        data_size -= rep_levels_bytes;
-      }
-      // TODO figure a way to set max_definition_level_ to 0
-      // if the initial value is invalid
-
-      // Init definition levels
-      if (descr_->max_definition_level() > 0) {
-        int64_t def_levels_bytes = definition_level_decoder_.SetData(
-            page.definition_level_encoding(), descr_->max_definition_level(),
-            static_cast<int>(num_buffered_values_), buffer);
-        buffer += def_levels_bytes;
-        data_size -= def_levels_bytes;
-      }
-
-      // Get a decoder object for this page or create a new decoder if this is the
-      // first page with this encoding.
-      Encoding::type encoding = page.encoding();
-
-      if (IsDictionaryIndexEncoding(encoding)) {
-        encoding = Encoding::RLE_DICTIONARY;
-      }
-
-      auto it = decoders_.find(static_cast<int>(encoding));
-      if (it != decoders_.end()) {
-        if (encoding == Encoding::RLE_DICTIONARY) {
-          DCHECK(current_decoder_->encoding() == Encoding::RLE_DICTIONARY);
-        }
-        current_decoder_ = it->second.get();
-      } else {
-        switch (encoding) {
-          case Encoding::PLAIN: {
-            auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
-            current_decoder_ = decoder.get();
-            decoders_[static_cast<int>(encoding)] = std::move(decoder);
-            break;
-          }
-          case Encoding::RLE_DICTIONARY:
-            throw ParquetException("Dictionary page must be before data page.");
-
-          case Encoding::DELTA_BINARY_PACKED:
-          case Encoding::DELTA_LENGTH_BYTE_ARRAY:
-          case Encoding::DELTA_BYTE_ARRAY:
-            ParquetException::NYI("Unsupported encoding");
-
-          default:
-            throw ParquetException("Unknown encoding type.");
-        }
-      }
-      current_decoder_->SetData(static_cast<int>(num_buffered_values_), buffer,
-                                static_cast<int>(data_size));
-      return true;
-    } else {
-      // We don't know what this page type is. We're allowed to skip non-data
-      // pages.
-      continue;
-    }
-  }
-  return true;
-}
-
 // ----------------------------------------------------------------------
 // Dynamic column reader constructor
 
@@ -731,4 +760,566 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(const ColumnDescriptor* descr,
   return std::shared_ptr<ColumnReader>(nullptr);
 }
 
+// ----------------------------------------------------------------------
+// RecordReader
+
+namespace internal {
+
+// The minimum number of repetition/definition levels to decode at a time, for
+// better vectorized performance when doing many smaller record reads
+constexpr int64_t kMinLevelBatchSize = 1024;
+
+template <typename DType>
+class TypedRecordReader : public ColumnReaderImplBase<DType>, public RecordReader {
+ public:
+  using T = typename DType::c_type;
+  using BASE = ColumnReaderImplBase<DType>;
+  TypedRecordReader(const ColumnDescriptor* descr, MemoryPool* pool) : BASE(descr, pool) {
+    nullable_values_ = internal::HasSpacedValues(descr);
+    at_record_start_ = true;
+    records_read_ = 0;
+    values_written_ = 0;
+    values_capacity_ = 0;
+    null_count_ = 0;
+    levels_written_ = 0;
+    levels_position_ = 0;
+    levels_capacity_ = 0;
+    uses_values_ = !(descr->physical_type() == Type::BYTE_ARRAY);
+
+    if (uses_values_) {
+      values_ = AllocateBuffer(pool);
+    }
+    valid_bits_ = AllocateBuffer(pool);
+    def_levels_ = AllocateBuffer(pool);
+    rep_levels_ = AllocateBuffer(pool);
+    Reset();
+  }
+
+  int64_t available_values_current_page() const {
+    return this->num_buffered_values_ - this->num_decoded_values_;
+  }
+
+  int64_t ReadRecords(int64_t num_records) override {
+    // Delimit records, then read values at the end
+    int64_t records_read = 0;
+
+    if (levels_position_ < levels_written_) {
+      records_read += ReadRecordData(num_records);
+    }
+
+    int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records);
+
+    // If we are in the middle of a record, we continue until reaching the
+    // desired number of records or the end of the current record if we've found
+    // enough records
+    while (!at_record_start_ || records_read < num_records) {
+      // Is there more data to read in this row group?
+      if (!this->HasNextInternal()) {
+        if (!at_record_start_) {
+          // We ended the row group while inside a record that we haven't seen
+          // the end of yet. So increment the record count for the last record in
+          // the row group
+          ++records_read;
+          at_record_start_ = true;
+        }
+        break;
+      }
+
+      /// We perform multiple batch reads until we either exhaust the row group
+      /// or observe the desired number of records
+      int64_t batch_size = std::min(level_batch_size, available_values_current_page());
+
+      // No more data in column
+      if (batch_size == 0) {
+        break;
+      }
+
+      if (this->max_def_level_ > 0) {
+        ReserveLevels(batch_size);
+
+        int16_t* def_levels = this->def_levels() + levels_written_;
+        int16_t* rep_levels = this->rep_levels() + levels_written_;
+
+        // Not present for non-repeated fields
+        int64_t levels_read = 0;
+        if (this->max_rep_level_ > 0) {
+          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
+          if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) {
+            throw ParquetException("Number of decoded rep / def levels did not match");
+          }
+        } else if (this->max_def_level_ > 0) {
+          levels_read = this->ReadDefinitionLevels(batch_size, def_levels);
+        }
+
+        // Exhausted column chunk
+        if (levels_read == 0) {
+          break;
+        }
+
+        levels_written_ += levels_read;
+        records_read += ReadRecordData(num_records - records_read);
+      } else {
+        // No repetition or definition levels
+        batch_size = std::min(num_records - records_read, batch_size);
+        records_read += ReadRecordData(batch_size);
+      }
+    }
+
+    return records_read;
+  }
+
+  // We may outwardly have the appearance of having exhausted a column chunk
+  // when in fact we are in the middle of processing the last batch
+  bool has_values_to_process() const { return levels_position_ < levels_written_; }
+
+  std::shared_ptr<ResizableBuffer> ReleaseValues() override {
+    if (uses_values_) {
+      auto result = values_;
+      values_ = AllocateBuffer(this->pool_);
+      return result;
+    } else {
+      return nullptr;
+    }
+  }
+
+  std::shared_ptr<ResizableBuffer> ReleaseIsValid() override {
+    auto result = valid_bits_;
+    valid_bits_ = AllocateBuffer(this->pool_);
+    return result;
+  }
+
+  // Process written repetition/definition levels to reach the end of
+  // records. Process no more levels than necessary to delimit the indicated
+  // number of logical records. Updates internal state of RecordReader
+  //
+  // \return Number of records delimited
+  int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) {
+    int64_t values_to_read = 0;
+    int64_t records_read = 0;
+
+    const int16_t* def_levels = this->def_levels() + levels_position_;
+    const int16_t* rep_levels = this->rep_levels() + levels_position_;
+
+    DCHECK_GT(this->max_rep_level_, 0);
+
+    // Count logical records and number of values to read
+    while (levels_position_ < levels_written_) {
+      if (*rep_levels++ == 0) {
+        // If at_record_start_ is true, we are seeing the start of a record
+        // for the second time, such as after repeated calls to
+        // DelimitRecords. In this case we must continue until we find
+        // another record start or exhausting the ColumnChunk
+        if (!at_record_start_) {
+          // We've reached the end of a record; increment the record count.
+          ++records_read;
+          if (records_read == num_records) {
+            // We've found the number of records we were looking for. Set
+            // at_record_start_ to true and break
+            at_record_start_ = true;
+            break;
+          }
+        }
+      }
+
+      // We have decided to consume the level at this position; therefore we
+      // must advance until we find another record boundary
+      at_record_start_ = false;
+
+      if (*def_levels++ == this->max_def_level_) {
+        ++values_to_read;
+      }
+      ++levels_position_;
+    }
+    *values_seen = values_to_read;
+    return records_read;
+  }
+
+  void Reserve(int64_t capacity) override {
+    ReserveLevels(capacity);
+    ReserveValues(capacity);
+  }
+
+  void ReserveLevels(int64_t capacity) {
+    if (this->max_def_level_ > 0 && (levels_written_ + capacity > levels_capacity_)) {
+      int64_t new_levels_capacity = BitUtil::NextPower2(levels_capacity_ + 1);
+      while (levels_written_ + capacity > new_levels_capacity) {
+        new_levels_capacity = BitUtil::NextPower2(new_levels_capacity + 1);
+      }
+      PARQUET_THROW_NOT_OK(
+          def_levels_->Resize(new_levels_capacity * sizeof(int16_t), false));
+      if (this->max_rep_level_ > 0) {
+        PARQUET_THROW_NOT_OK(
+            rep_levels_->Resize(new_levels_capacity * sizeof(int16_t), false));
+      }
+      levels_capacity_ = new_levels_capacity;
+    }
+  }
+
+  void ReserveValues(int64_t capacity) {
+    if (values_written_ + capacity > values_capacity_) {
+      int64_t new_values_capacity = BitUtil::NextPower2(values_capacity_ + 1);
+      while (values_written_ + capacity > new_values_capacity) {
+        new_values_capacity = BitUtil::NextPower2(new_values_capacity + 1);
+      }
+
+      int type_size = GetTypeByteSize(this->descr_->physical_type());
+
+      // XXX(wesm): A hack to avoid memory allocation when reading directly
+      // into builder classes
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(values_->Resize(new_values_capacity * type_size, false));
+      }
+
+      values_capacity_ = new_values_capacity;
+    }
+    if (nullable_values_) {
+      int64_t valid_bytes_new = BitUtil::BytesForBits(values_capacity_);
+      if (valid_bits_->size() < valid_bytes_new) {
+        int64_t valid_bytes_old = BitUtil::BytesForBits(values_written_);
+        PARQUET_THROW_NOT_OK(valid_bits_->Resize(valid_bytes_new, false));
+
+        // Avoid valgrind warnings
+        memset(valid_bits_->mutable_data() + valid_bytes_old, 0,
+               valid_bytes_new - valid_bytes_old);
+      }
+    }
+  }
+
+  void Reset() override {
+    ResetValues();
+
+    if (levels_written_ > 0) {
+      const int64_t levels_remaining = levels_written_ - levels_position_;
+      // Shift remaining levels to beginning of buffer and trim to only the number
+      // of decoded levels remaining
+      int16_t* def_data = def_levels();
+      int16_t* rep_data = rep_levels();
+
+      std::copy(def_data + levels_position_, def_data + levels_written_, def_data);
+      PARQUET_THROW_NOT_OK(
+          def_levels_->Resize(levels_remaining * sizeof(int16_t), false));
+
+      if (this->max_rep_level_ > 0) {
+        std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data);
+        PARQUET_THROW_NOT_OK(
+            rep_levels_->Resize(levels_remaining * sizeof(int16_t), false));
+      }
+
+      levels_written_ -= levels_position_;
+      levels_position_ = 0;
+      levels_capacity_ = levels_remaining;
+    }
+
+    records_read_ = 0;
+
+    // Call Finish on the binary builders to reset them
+  }
+
+  void SetPageReader(std::unique_ptr<PageReader> reader) override {
+    at_record_start_ = true;
+    this->pager_ = std::move(reader);
+    ResetDecoders();
+  }
+
+  bool HasMoreData() const override { return this->pager_ != nullptr; }
+
+  // Dictionary decoders must be reset when advancing row groups
+  void ResetDecoders() { this->decoders_.clear(); }
+
+  virtual void ReadValuesSpaced(int64_t values_with_nulls, int64_t null_count) {
+    uint8_t* valid_bits = valid_bits_->mutable_data();
+    const int64_t valid_bits_offset = values_written_;
+
+    int64_t num_decoded = this->current_decoder_->DecodeSpaced(
+        ValuesHead<T>(), static_cast<int>(values_with_nulls),
+        static_cast<int>(null_count), valid_bits, valid_bits_offset);
+    DCHECK_EQ(num_decoded, values_with_nulls);
+  }
+
+  virtual void ReadValuesDense(int64_t values_to_read) {
+    int64_t num_decoded =
+        this->current_decoder_->Decode(ValuesHead<T>(), static_cast<int>(values_to_read));
+    DCHECK_EQ(num_decoded, values_to_read);
+  }
+
+  // Return number of logical records read
+  int64_t ReadRecordData(int64_t num_records) {
+    // Conservative upper bound
+    const int64_t possible_num_values =
+        std::max(num_records, levels_written_ - levels_position_);
+    ReserveValues(possible_num_values);
+
+    const int64_t start_levels_position = levels_position_;
+
+    int64_t values_to_read = 0;
+    int64_t records_read = 0;
+    if (this->max_rep_level_ > 0) {
+      records_read = DelimitRecords(num_records, &values_to_read);
+    } else if (this->max_def_level_ > 0) {
+      // No repetition levels, skip delimiting logic. Each level represents a
+      // null or not null entry
+      records_read = std::min(levels_written_ - levels_position_, num_records);
+
+      // This is advanced by DelimitRecords, which we skipped
+      levels_position_ += records_read;
+    } else {
+      records_read = values_to_read = num_records;
+    }
+
+    int64_t null_count = 0;
+    if (nullable_values_) {
+      int64_t values_with_nulls = 0;
+      internal::DefinitionLevelsToBitmap(
+          def_levels() + start_levels_position, levels_position_ - start_levels_position,
+          this->max_def_level_, this->max_rep_level_, &values_with_nulls, &null_count,
+          valid_bits_->mutable_data(), values_written_);
+      values_to_read = values_with_nulls - null_count;
+      ReadValuesSpaced(values_with_nulls, null_count);
+      this->ConsumeBufferedValues(levels_position_ - start_levels_position);
+    } else {
+      ReadValuesDense(values_to_read);
+      this->ConsumeBufferedValues(values_to_read);
+    }
+    // Total values, including null spaces, if any
+    values_written_ += values_to_read + null_count;
+    null_count_ += null_count;
+
+    return records_read;
+  }
+
+  void DebugPrintState() override {
+    const int16_t* def_levels = this->def_levels();
+    const int16_t* rep_levels = this->rep_levels();
+    const int64_t total_levels_read = levels_position_;
+
+    const T* vals = reinterpret_cast<const T*>(this->values());
+
+    std::cout << "def levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << def_levels[i] << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "rep levels: ";
+    for (int64_t i = 0; i < total_levels_read; ++i) {
+      std::cout << rep_levels[i] << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "values: ";
+    for (int64_t i = 0; i < this->values_written(); ++i) {
+      std::cout << vals[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() override {
+    throw ParquetException("GetChunks only implemented for binary types");
+  }
+
+  void ResetValues() {
+    if (values_written_ > 0) {
+      // Resize to 0, but do not shrink to fit
+      if (uses_values_) {
+        PARQUET_THROW_NOT_OK(values_->Resize(0, false));
+      }
+      PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false));
+      values_written_ = 0;
+      values_capacity_ = 0;
+      null_count_ = 0;
+    }
+  }
+
+ protected:
+  template <typename T>
+  T* ValuesHead() {
+    return reinterpret_cast<T*>(values_->mutable_data()) + values_written_;
+  }
+};
+
+class FLBARecordReader : public TypedRecordReader<FLBAType> {
+ public:
+  FLBARecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : TypedRecordReader<FLBAType>(descr, pool), builder_(nullptr) {
+    DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY);
+    int byte_width = descr_->type_length();
+    std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
+    builder_.reset(new ::arrow::FixedSizeBinaryBuilder(type, this->pool_));
+  }
+
+  ::arrow::ArrayVector GetBuilderChunks() override {
+    std::shared_ptr<::arrow::Array> chunk;
+    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
+    return ::arrow::ArrayVector({chunk});
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    auto values = ValuesHead<FLBA>();
+    int64_t num_decoded =
+        this->current_decoder_->Decode(values, static_cast<int>(values_to_read));
+    DCHECK_EQ(num_decoded, values_to_read);
+
+    for (int64_t i = 0; i < num_decoded; i++) {
+      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
+    }
+    ResetValues();
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    uint8_t* valid_bits = valid_bits_->mutable_data();
+    const int64_t valid_bits_offset = values_written_;
+    auto values = ValuesHead<FLBA>();
+
+    int64_t num_decoded = this->current_decoder_->DecodeSpaced(
+        values, static_cast<int>(values_to_read), static_cast<int>(null_count),
+        valid_bits, valid_bits_offset);
+    DCHECK_EQ(num_decoded, values_to_read);
+
+    for (int64_t i = 0; i < num_decoded; i++) {
+      if (::arrow::BitUtil::GetBit(valid_bits, valid_bits_offset + i)) {
+        PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
+      } else {
+        PARQUET_THROW_NOT_OK(builder_->AppendNull());
+      }
+    }
+    ResetValues();
+  }
+
+ private:
+  std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_;
+};
+
+class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType> {
+ public:
+  using BuilderType = ::arrow::internal::ChunkedBinaryBuilder;
+
+  ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : TypedRecordReader<ByteArrayType>(descr, pool), builder_(nullptr) {
+    // ARROW-4688(wesm): Using 2^31 - 1 chunks for now
+    constexpr int32_t kBinaryChunksize = 2147483647;
+    DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
+    if (descr_->converted_type() == ConvertedType::UTF8) {
+      builder_.reset(
+          new ::arrow::internal::ChunkedStringBuilder(kBinaryChunksize, this->pool_));
+    } else {
+      builder_.reset(
+          new ::arrow::internal::ChunkedBinaryBuilder(kBinaryChunksize, this->pool_));
+    }
+  }
+
+  ::arrow::ArrayVector GetBuilderChunks() override {
+    ::arrow::ArrayVector chunks;
+    PARQUET_THROW_NOT_OK(builder_->Finish(&chunks));
+    return chunks;
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
+        static_cast<int>(values_to_read), builder_.get());
+    DCHECK_EQ(num_decoded, values_to_read);
+    ResetValues();
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrow(
+        static_cast<int>(values_to_read), static_cast<int>(null_count),
+        valid_bits_->mutable_data(), values_written_, builder_.get());
+    DCHECK_EQ(num_decoded, values_to_read);
+    ResetValues();
+  }
+
+ private:
+  std::unique_ptr<BuilderType> builder_;
+};
+
+template <typename BuilderType>
+class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType> {
+ public:
+  ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr,
+                                  ::arrow::MemoryPool* pool)
+      : TypedRecordReader<ByteArrayType>(descr, pool), builder_(new BuilderType(pool)) {}
+
+  ::arrow::ArrayVector GetBuilderChunks() override {
+    std::shared_ptr<::arrow::Array> chunk;
+    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
+    return ::arrow::ArrayVector({chunk});
+  }
+
+  void ReadValuesDense(int64_t values_to_read) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
+        static_cast<int>(values_to_read), builder_.get());
+    DCHECK_EQ(num_decoded, values_to_read);
+    ResetValues();
+  }
+
+  void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
+    int64_t num_decoded = this->current_decoder_->DecodeArrow(
+        static_cast<int>(values_to_read), static_cast<int>(null_count),
+        valid_bits_->mutable_data(), values_written_, builder_.get());
+    DCHECK_EQ(num_decoded, values_to_read);
+    ResetValues();
+  }
+
+ private:
+  std::unique_ptr<BuilderType> builder_;
+};
+
+// TODO(wesm): Implement these to some satisfaction
+template <>
+void TypedRecordReader<Int96Type>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<ByteArrayType>::DebugPrintState() {}
+
+template <>
+void TypedRecordReader<FLBAType>::DebugPrintState() {}
+
+std::shared_ptr<RecordReader> MakeByteArrayRecordReader(const ColumnDescriptor* descr,
+                                                        arrow::MemoryPool* pool,
+                                                        bool read_dictionary) {
+  if (read_dictionary) {
+    if (descr->converted_type() == ConvertedType::UTF8) {
+      using Builder = ::arrow::StringDictionaryBuilder;
+      return std::make_shared<ByteArrayDictionaryRecordReader<Builder>>(descr, pool);
+    } else {
+      using Builder = ::arrow::BinaryDictionaryBuilder;
+      return std::make_shared<ByteArrayDictionaryRecordReader<Builder>>(descr, pool);
+    }
+  } else {
+    return std::make_shared<ByteArrayChunkedRecordReader>(descr, pool);
+  }
+}
+
+std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
+                                                 MemoryPool* pool,
+                                                 const bool read_dictionary) {
+  switch (descr->physical_type()) {
+    case Type::BOOLEAN:
+      return std::make_shared<TypedRecordReader<BooleanType>>(descr, pool);
+    case Type::INT32:
+      return std::make_shared<TypedRecordReader<Int32Type>>(descr, pool);
+    case Type::INT64:
+      return std::make_shared<TypedRecordReader<Int64Type>>(descr, pool);
+    case Type::INT96:
+      return std::make_shared<TypedRecordReader<Int96Type>>(descr, pool);
+    case Type::FLOAT:
+      return std::make_shared<TypedRecordReader<FloatType>>(descr, pool);
+    case Type::DOUBLE:
+      return std::make_shared<TypedRecordReader<DoubleType>>(descr, pool);
+    case Type::BYTE_ARRAY:
+      return MakeByteArrayRecordReader(descr, pool, read_dictionary);
+    case Type::FIXED_LEN_BYTE_ARRAY:
+      return std::make_shared<FLBARecordReader>(descr, pool);
+    default: {
+      // PARQUET-1481: This can occur if the file is corrupt
+      std::stringstream ss;
+      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
+      throw ParquetException(ss.str());
+    }
+  }
+  // Unreachable code, but supress compiler warning
+  return nullptr;
+}
+
+}  // namespace internal
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index e7d6afbb467..461cf726733 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <unordered_map>
 #include <utility>
+#include <vector>
 
 #include "parquet/encoding.h"
 #include "parquet/exception.h"
@@ -31,6 +32,8 @@
 
 namespace arrow {
 
+class Array;
+
 namespace BitUtil {
 class BitReader;
 }  // namespace BitUtil
@@ -179,6 +182,106 @@ class TypedColumnReader : public ColumnReader {
 
 namespace internal {
 
+/// \brief Stateful column reader that delimits semantic records for both flat
+/// and nested columns
+///
+/// \note API EXPERIMENTAL
+/// \since 1.3.0
+class RecordReader {
+ public:
+  static std::shared_ptr<RecordReader> Make(
+      const ColumnDescriptor* descr,
+      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
+      const bool read_dictionary = false);
+
+  virtual ~RecordReader() = default;
+
+  /// \brief Attempt to read indicated number of records from column chunk
+  /// \return number of records read
+  virtual int64_t ReadRecords(int64_t num_records) = 0;
+
+  /// \brief Pre-allocate space for data. Results in better flat read performance
+  virtual void Reserve(int64_t num_values) = 0;
+
+  /// \brief Clear consumed values and repetition/definition levels as the
+  /// result of calling ReadRecords
+  virtual void Reset() = 0;
+
+  /// \brief Transfer filled values buffer to caller. A new one will be
+  /// allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseValues() = 0;
+
+  /// \brief Transfer filled validity bitmap buffer to caller. A new one will
+  /// be allocated in subsequent ReadRecords calls
+  virtual std::shared_ptr<ResizableBuffer> ReleaseIsValid() = 0;
+
+  /// \brief Return true if the record reader has more internal data yet to
+  /// process
+  virtual bool HasMoreData() const = 0;
+
+  /// \brief Advance record reader to the next row group
+  /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader
+  virtual void SetPageReader(std::unique_ptr<PageReader> reader) = 0;
+
+  virtual void DebugPrintState() = 0;
+
+  // For BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY types that may have chunked output
+  virtual std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks() = 0;
+
+  /// \brief Decoded definition levels
+  int16_t* def_levels() const {
+    return reinterpret_cast<int16_t*>(def_levels_->mutable_data());
+  }
+
+  /// \brief Decoded repetition levels
+  int16_t* rep_levels() const {
+    return reinterpret_cast<int16_t*>(rep_levels_->mutable_data());
+  }
+
+  /// \brief Decoded values, including nulls, if any
+  uint8_t* values() const { return values_->mutable_data(); }
+
+  /// \brief Number of values written including nulls (if any)
+  int64_t values_written() const { return values_written_; }
+
+  /// \brief Number of definition / repetition levels (from those that have
+  /// been decoded) that have been consumed inside the reader.
+  int64_t levels_position() const { return levels_position_; }
+
+  /// \brief Number of definition / repetition levels that have been written
+  /// internally in the reader
+  int64_t levels_written() const { return levels_written_; }
+
+  /// \brief Number of nulls in the leaf
+  int64_t null_count() const { return null_count_; }
+
+  /// \brief True if the leaf values are nullable
+  bool nullable_values() const { return nullable_values_; }
+
+ protected:
+  bool nullable_values_;
+
+  bool at_record_start_;
+  int64_t records_read_;
+
+  int64_t values_written_;
+  int64_t values_capacity_;
+  int64_t null_count_;
+
+  int64_t levels_written_;
+  int64_t levels_position_;
+  int64_t levels_capacity_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> values_;
+  // In the case of false, don't allocate the values buffer (when we directly read into
+  // builder classes).
+  bool uses_values_;
+
+  std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
+  std::shared_ptr<::arrow::ResizableBuffer> def_levels_;
+  std::shared_ptr<::arrow::ResizableBuffer> rep_levels_;
+};
+
 static inline void DefinitionLevelsToBitmap(
     const int16_t* def_levels, int64_t num_def_levels, const int16_t max_definition_level,
     const int16_t max_repetition_level, int64_t* values_read, int64_t* null_count,
@@ -186,7 +289,7 @@ static inline void DefinitionLevelsToBitmap(
   // We assume here that valid_bits is large enough to accommodate the
   // additional definition levels and the ones that have already been written
   ::arrow::internal::BitmapWriter valid_bits_writer(valid_bits, valid_bits_offset,
-                                                    valid_bits_offset + num_def_levels);
+                                                    num_def_levels);
 
   // TODO(itaiin): As an interim solution we are splitting the code path here
   // between repeated+flat column reads, and non-repeated+nested reads.
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 77f86e36f9b..304724b6b52 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -29,6 +29,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/rle-encoding.h"
 #include "arrow/util/string_view.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/exception.h"
 #include "parquet/platform.h"
@@ -609,7 +610,7 @@ inline int DecodePlain<ByteArray>(const uint8_t* data, int64_t data_size, int nu
   int bytes_decoded = 0;
   int increment;
   for (int i = 0; i < num_values; ++i) {
-    uint32_t len = out[i].len = *reinterpret_cast<const uint32_t*>(data);
+    uint32_t len = out[i].len = arrow::util::SafeLoadAs<uint32_t>(data);
     increment = static_cast<int>(sizeof(uint32_t) + len);
     if (data_size < increment) ParquetException::EofException();
     out[i].ptr = data + sizeof(uint32_t);
@@ -719,7 +720,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
     int bytes_decoded = 0;
     while (i < num_values) {
       if (bit_reader.IsSet()) {
-        uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+        uint32_t len = arrow::util::SafeLoadAs<uint32_t>(data);
         increment = static_cast<int>(sizeof(uint32_t) + len);
         if (data_size < increment) {
           ParquetException::EofException();
@@ -752,7 +753,7 @@ class PlainByteArrayDecoder : public PlainDecoder<ByteArrayType>,
     int bytes_decoded = 0;
 
     while (i < num_values) {
-      uint32_t len = *reinterpret_cast<const uint32_t*>(data);
+      uint32_t len = arrow::util::SafeLoadAs<uint32_t>(data);
       int increment = static_cast<int>(sizeof(uint32_t) + len);
       if (data_size < increment) ParquetException::EofException();
       builder->Append(data + sizeof(uint32_t), len);
@@ -1103,7 +1104,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
   virtual void SetData(int num_values, const uint8_t* data, int len) {
     num_values_ = num_values;
     if (len == 0) return;
-    int total_lengths_len = *reinterpret_cast<const int*>(data);
+    int total_lengths_len = arrow::util::SafeLoadAs<int32_t>(data);
     data += 4;
     this->len_decoder_.SetData(num_values, data, total_lengths_len);
     data_ = data + total_lengths_len;
@@ -1145,7 +1146,7 @@ class DeltaByteArrayDecoder : public DecoderImpl,
   virtual void SetData(int num_values, const uint8_t* data, int len) {
     num_values_ = num_values;
     if (len == 0) return;
-    int prefix_len_length = *reinterpret_cast<const int*>(data);
+    int prefix_len_length = arrow::util::SafeLoadAs<int32_t>(data);
     data += 4;
     len -= 4;
     prefix_len_decoder_.SetData(num_values, data, prefix_len_length);
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 959ea0dfb06..d0ca9ca809d 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -28,6 +28,7 @@
 #include "arrow/io/file.h"
 #include "arrow/status.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/ubsan.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"
@@ -179,7 +180,7 @@ class SerializedFile : public ParquetFileReader::Contents {
       throw ParquetException("Invalid parquet file. Corrupt footer.");
     }
 
-    uint32_t metadata_len = *reinterpret_cast<const uint32_t*>(
+    uint32_t metadata_len = arrow::util::SafeLoadAs<uint32_t>(
         reinterpret_cast<const uint8_t*>(footer_buffer->data()) + footer_read_size -
         kFooterSize);
     int64_t metadata_start = file_size - kFooterSize - metadata_len;
diff --git a/cpp/src/parquet/schema-test.cc b/cpp/src/parquet/schema-test.cc
index 4e11d1ff2d1..023a1b02a6a 100644
--- a/cpp/src/parquet/schema-test.cc
+++ b/cpp/src/parquet/schema-test.cc
@@ -1079,8 +1079,6 @@ TEST(TestLogicalTypeConstruction, NewTypeIncompatibility) {
       {LogicalType::Time(false, LogicalType::TimeUnit::MICROS), check_is_time},
       {LogicalType::Time(false, LogicalType::TimeUnit::NANOS), check_is_time},
       {LogicalType::Time(true, LogicalType::TimeUnit::NANOS), check_is_time},
-      {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS), check_is_timestamp},
-      {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS), check_is_timestamp},
       {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS), check_is_timestamp},
       {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS), check_is_timestamp},
   };
@@ -1399,23 +1397,35 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) {
        "Time(isAdjustedToUTC=false, timeUnit=nanoseconds)",
        R"({"Type": "Time", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
       {LogicalType::Timestamp(true, LogicalType::TimeUnit::MILLIS),
-       "Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds"})"},
+       "Timestamp(isAdjustedToUTC=true, timeUnit=milliseconds, "
+       "is_from_converted_type=false, force_set_converted_type=false)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "milliseconds", )"
+       R"("is_from_converted_type": false, "force_set_converted_type": false})"},
       {LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
-       "Timestamp(isAdjustedToUTC=true, timeUnit=microseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds"})"},
+       "Timestamp(isAdjustedToUTC=true, timeUnit=microseconds, "
+       "is_from_converted_type=false, force_set_converted_type=false)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "microseconds", )"
+       R"("is_from_converted_type": false, "force_set_converted_type": false})"},
       {LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
-       "Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds"})"},
-      {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS),
-       "Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds"})"},
+       "Timestamp(isAdjustedToUTC=true, timeUnit=nanoseconds, "
+       "is_from_converted_type=false, force_set_converted_type=false)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": true, "timeUnit": "nanoseconds", )"
+       R"("is_from_converted_type": false, "force_set_converted_type": false})"},
+      {LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS, true, true),
+       "Timestamp(isAdjustedToUTC=false, timeUnit=milliseconds, "
+       "is_from_converted_type=true, force_set_converted_type=true)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "milliseconds", )"
+       R"("is_from_converted_type": true, "force_set_converted_type": true})"},
       {LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
-       "Timestamp(isAdjustedToUTC=false, timeUnit=microseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds"})"},
+       "Timestamp(isAdjustedToUTC=false, timeUnit=microseconds, "
+       "is_from_converted_type=false, force_set_converted_type=false)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "microseconds", )"
+       R"("is_from_converted_type": false, "force_set_converted_type": false})"},
       {LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
-       "Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds)",
-       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds"})"},
+       "Timestamp(isAdjustedToUTC=false, timeUnit=nanoseconds, "
+       "is_from_converted_type=false, force_set_converted_type=false)",
+       R"({"Type": "Timestamp", "isAdjustedToUTC": false, "timeUnit": "nanoseconds", )"
+       R"("is_from_converted_type": false, "force_set_converted_type": false})"},
       {LogicalType::Interval(), "Interval", R"({"Type": "Interval"})"},
       {LogicalType::Int(8, false), "Int(bitWidth=8, isSigned=false)",
        R"({"Type": "Int", "bitWidth": 8, "isSigned": false})"},
@@ -1669,6 +1679,16 @@ struct SchemaElementConstructionArguments {
   std::function<bool()> check_logicalType;
 };
 
+struct LegacySchemaElementConstructionArguments {
+  std::string name;
+  Type::type physical_type;
+  int physical_length;
+  bool expect_converted_type;
+  ConvertedType::type converted_type;
+  bool expect_logicalType;
+  std::function<bool()> check_logicalType;
+};
+
 class TestSchemaElementConstruction : public ::testing::Test {
  public:
   TestSchemaElementConstruction* Reconstruct(
@@ -1688,6 +1708,23 @@ class TestSchemaElementConstruction : public ::testing::Test {
     return this;
   }
 
+  TestSchemaElementConstruction* LegacyReconstruct(
+      const LegacySchemaElementConstructionArguments& c) {
+    // Make node, create serializable Thrift object from it ...
+    node_ = PrimitiveNode::Make(c.name, Repetition::REQUIRED, c.physical_type,
+                                c.converted_type, c.physical_length);
+    element_.reset(new format::SchemaElement);
+    node_->ToParquet(element_.get());
+
+    // ... then set aside some values for later inspection.
+    name_ = c.name;
+    expect_converted_type_ = c.expect_converted_type;
+    converted_type_ = c.converted_type;
+    expect_logicalType_ = c.expect_logicalType;
+    check_logicalType_ = c.check_logicalType;
+    return this;
+  }
+
   void Inspect() {
     ASSERT_EQ(element_->name, name_);
     if (expect_converted_type_) {
@@ -1773,6 +1810,17 @@ TEST_F(TestSchemaElementConstruction, SimpleCases) {
   for (const SchemaElementConstructionArguments& c : cases) {
     this->Reconstruct(c)->Inspect();
   }
+
+  std::vector<LegacySchemaElementConstructionArguments> legacy_cases = {
+      {"timestamp_ms", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, false,
+       check_nothing},
+      {"timestamp_us", Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, false,
+       check_nothing},
+  };
+
+  for (const LegacySchemaElementConstructionArguments& c : legacy_cases) {
+    this->LegacyReconstruct(c)->Inspect();
+  }
 }
 
 class TestDecimalSchemaElementConstruction : public TestSchemaElementConstruction {
@@ -1914,10 +1962,20 @@ TEST_F(TestTemporalSchemaElementConstruction, TemporalCases) {
        Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
       {"timestamp_F_ms", LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS),
        Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
+      {"timestamp_F_ms_force",
+       LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+                              /*is_from_converted_type=*/false,
+                              /*force_set_converted_type=*/true),
+       Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
       {"timestamp_T_us", LogicalType::Timestamp(true, LogicalType::TimeUnit::MICROS),
        Type::INT64, -1, true, ConvertedType::TIMESTAMP_MICROS, true, check_TIMESTAMP},
       {"timestamp_F_us", LogicalType::Timestamp(false, LogicalType::TimeUnit::MICROS),
        Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
+      {"timestamp_F_us_force",
+       LogicalType::Timestamp(false, LogicalType::TimeUnit::MILLIS,
+                              /*is_from_converted_type=*/false,
+                              /*force_set_converted_type=*/true),
+       Type::INT64, -1, true, ConvertedType::TIMESTAMP_MILLIS, true, check_TIMESTAMP},
       {"timestamp_T_ns", LogicalType::Timestamp(true, LogicalType::TimeUnit::NANOS),
        Type::INT64, -1, false, ConvertedType::NA, true, check_TIMESTAMP},
       {"timestamp_F_ns", LogicalType::Timestamp(false, LogicalType::TimeUnit::NANOS),
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 644e28ff38c..f89170d40b4 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -380,9 +380,13 @@ std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
     case ConvertedType::TIME_MICROS:
       return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
     case ConvertedType::TIMESTAMP_MILLIS:
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS,
+                                        /*is_from_converted_type=*/true,
+                                        /*force_set_converted_type=*/false);
     case ConvertedType::TIMESTAMP_MICROS:
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS,
+                                        /*is_from_converted_type=*/true,
+                                        /*force_set_converted_type=*/false);
     case ConvertedType::INTERVAL:
       return IntervalLogicalType::Make();
     case ConvertedType::INT_8:
@@ -496,9 +500,11 @@ std::shared_ptr<const LogicalType> LogicalType::Time(
 }
 
 std::shared_ptr<const LogicalType> LogicalType::Timestamp(
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+    bool is_from_converted_type, bool force_set_converted_type) {
   DCHECK(time_unit != LogicalType::TimeUnit::UNKNOWN);
-  return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit);
+  return TimestampLogicalType::Make(is_adjusted_to_utc, time_unit, is_from_converted_type,
+                                    force_set_converted_type);
 }
 
 std::shared_ptr<const LogicalType> LogicalType::Interval() {
@@ -552,6 +558,10 @@ class LogicalType::Impl {
 
   virtual std::string ToString() const = 0;
 
+  virtual bool is_serialized() const {
+    return !(type_ == LogicalType::Type::NONE || type_ == LogicalType::Type::UNKNOWN);
+  }
+
   virtual std::string ToJSON() const {
     std::stringstream json;
     json << R"({"Type": ")" << ToString() << R"("})";
@@ -676,10 +686,7 @@ bool LogicalType::is_nested() const {
          (impl_->type() == LogicalType::Type::MAP);
 }
 bool LogicalType::is_nonnested() const { return !is_nested(); }
-bool LogicalType::is_serialized() const {
-  return !((impl_->type() == LogicalType::Type::NONE) ||
-           (impl_->type() == LogicalType::Type::UNKNOWN));
-}
+bool LogicalType::is_serialized() const { return impl_->is_serialized(); }
 
 // LogicalTypeImpl intermediate "compatibility" classes
 
@@ -1192,6 +1199,7 @@ class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
  public:
   friend class TimestampLogicalType;
 
+  bool is_serialized() const override;
   bool is_compatible(ConvertedType::type converted_type,
                      schema::DecimalMetadata converted_decimal_metadata) const override;
   ConvertedType::type ToConvertedType(
@@ -1204,25 +1212,47 @@ class LogicalType::Impl::Timestamp final : public LogicalType::Impl::Compatible,
   bool is_adjusted_to_utc() const { return adjusted_; }
   LogicalType::TimeUnit::unit time_unit() const { return unit_; }
 
+  bool is_from_converted_type() const { return is_from_converted_type_; }
+  bool force_set_converted_type() const { return force_set_converted_type_; }
+
  private:
-  Timestamp(bool a, LogicalType::TimeUnit::unit u)
+  Timestamp(bool adjusted, LogicalType::TimeUnit::unit unit, bool is_from_converted_type,
+            bool force_set_converted_type)
       : LogicalType::Impl(LogicalType::Type::TIMESTAMP, SortOrder::SIGNED),
         LogicalType::Impl::SimpleApplicable(parquet::Type::INT64),
-        adjusted_(a),
-        unit_(u) {}
+        adjusted_(adjusted),
+        unit_(unit),
+        is_from_converted_type_(is_from_converted_type),
+        force_set_converted_type_(force_set_converted_type) {}
   bool adjusted_ = false;
   LogicalType::TimeUnit::unit unit_;
+  bool is_from_converted_type_ = false;
+  bool force_set_converted_type_ = false;
 };
 
+bool LogicalType::Impl::Timestamp::is_serialized() const {
+  return !is_from_converted_type_;
+}
+
 bool LogicalType::Impl::Timestamp::is_compatible(
     ConvertedType::type converted_type,
     schema::DecimalMetadata converted_decimal_metadata) const {
   if (converted_decimal_metadata.isset) {
     return false;
-  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MILLIS) {
-    return converted_type == ConvertedType::TIMESTAMP_MILLIS;
-  } else if (adjusted_ && unit_ == LogicalType::TimeUnit::MICROS) {
-    return converted_type == ConvertedType::TIMESTAMP_MICROS;
+  } else if (unit_ == LogicalType::TimeUnit::MILLIS) {
+    if (adjusted_ || force_set_converted_type_) {
+      return converted_type == ConvertedType::TIMESTAMP_MILLIS;
+    } else {
+      return (converted_type == ConvertedType::NONE) ||
+             (converted_type == ConvertedType::NA);
+    }
+  } else if (unit_ == LogicalType::TimeUnit::MICROS) {
+    if (adjusted_ || force_set_converted_type_) {
+      return converted_type == ConvertedType::TIMESTAMP_MICROS;
+    } else {
+      return (converted_type == ConvertedType::NONE) ||
+             (converted_type == ConvertedType::NA);
+    }
   } else {
     return (converted_type == ConvertedType::NONE) ||
            (converted_type == ConvertedType::NA);
@@ -1232,7 +1262,7 @@ bool LogicalType::Impl::Timestamp::is_compatible(
 ConvertedType::type LogicalType::Impl::Timestamp::ToConvertedType(
     schema::DecimalMetadata* out_decimal_metadata) const {
   reset_decimal_metadata(out_decimal_metadata);
-  if (adjusted_) {
+  if (adjusted_ || force_set_converted_type_) {
     if (unit_ == LogicalType::TimeUnit::MILLIS) {
       return ConvertedType::TIMESTAMP_MILLIS;
     } else if (unit_ == LogicalType::TimeUnit::MICROS) {
@@ -1245,14 +1275,18 @@ ConvertedType::type LogicalType::Impl::Timestamp::ToConvertedType(
 std::string LogicalType::Impl::Timestamp::ToString() const {
   std::stringstream type;
   type << "Timestamp(isAdjustedToUTC=" << std::boolalpha << adjusted_
-       << ", timeUnit=" << time_unit_string(unit_) << ")";
+       << ", timeUnit=" << time_unit_string(unit_)
+       << ", is_from_converted_type=" << is_from_converted_type_
+       << ", force_set_converted_type=" << force_set_converted_type_ << ")";
   return type.str();
 }
 
 std::string LogicalType::Impl::Timestamp::ToJSON() const {
   std::stringstream json;
   json << R"({"Type": "Timestamp", "isAdjustedToUTC": )" << std::boolalpha << adjusted_
-       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"("})";
+       << R"(, "timeUnit": ")" << time_unit_string(unit_) << R"(")"
+       << R"(, "is_from_converted_type": )" << is_from_converted_type_
+       << R"(, "force_set_converted_type": )" << force_set_converted_type_ << R"(})";
   return json.str();
 }
 
@@ -1288,13 +1322,14 @@ bool LogicalType::Impl::Timestamp::Equals(const LogicalType& other) const {
 }
 
 std::shared_ptr<const LogicalType> TimestampLogicalType::Make(
-    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit) {
+    bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+    bool is_from_converted_type, bool force_set_converted_type) {
   if (time_unit == LogicalType::TimeUnit::MILLIS ||
       time_unit == LogicalType::TimeUnit::MICROS ||
       time_unit == LogicalType::TimeUnit::NANOS) {
     auto* logical_type = new TimestampLogicalType();
-    logical_type->impl_.reset(
-        new LogicalType::Impl::Timestamp(is_adjusted_to_utc, time_unit));
+    logical_type->impl_.reset(new LogicalType::Impl::Timestamp(
+        is_adjusted_to_utc, time_unit, is_from_converted_type, force_set_converted_type));
     return std::shared_ptr<const LogicalType>(logical_type);
   } else {
     throw ParquetException(
@@ -1310,6 +1345,16 @@ LogicalType::TimeUnit::unit TimestampLogicalType::time_unit() const {
   return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_)).time_unit();
 }
 
+bool TimestampLogicalType::is_from_converted_type() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
+      .is_from_converted_type();
+}
+
+bool TimestampLogicalType::force_set_converted_type() const {
+  return (dynamic_cast<const LogicalType::Impl::Timestamp&>(*impl_))
+      .force_set_converted_type();
+}
+
 class LogicalType::Impl::Interval final : public LogicalType::Impl::SimpleCompatible,
                                           public LogicalType::Impl::TypeLengthApplicable {
  public:
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 0bfaf99b381..dfa056ebe56 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -182,8 +182,20 @@ class PARQUET_EXPORT LogicalType {
   static std::shared_ptr<const LogicalType> Date();
   static std::shared_ptr<const LogicalType> Time(bool is_adjusted_to_utc,
                                                  LogicalType::TimeUnit::unit time_unit);
+
+  /// \brief Create a Timestamp logical type
+  /// \param[in] is_adjusted_to_utc set true if the data is UTC-normalized
+  /// \param[in] time_unit the resolution of the timestamp
+  /// \param[in] is_from_converted_type if true, the timestamp was generated
+  /// by translating a legacy converted type of TIMESTAMP_MILLIS or
+  /// TIMESTAMP_MICROS. Default is false.
+  /// \param[in] force_set_converted_type if true, always set the
+  /// legacy ConvertedType TIMESTAMP_MICROS and TIMESTAMP_MILLIS
+  /// metadata. Default is false
   static std::shared_ptr<const LogicalType> Timestamp(
-      bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit);
+      bool is_adjusted_to_utc, LogicalType::TimeUnit::unit time_unit,
+      bool is_from_converted_type = false, bool force_set_converted_type = false);
+
   static std::shared_ptr<const LogicalType> Interval();
   static std::shared_ptr<const LogicalType> Int(int bit_width, bool is_signed);
   static std::shared_ptr<const LogicalType> Null();
@@ -337,10 +349,19 @@ class PARQUET_EXPORT TimeLogicalType : public LogicalType {
 class PARQUET_EXPORT TimestampLogicalType : public LogicalType {
  public:
   static std::shared_ptr<const LogicalType> Make(bool is_adjusted_to_utc,
-                                                 LogicalType::TimeUnit::unit time_unit);
+                                                 LogicalType::TimeUnit::unit time_unit,
+                                                 bool is_from_converted_type = false,
+                                                 bool force_set_converted_type = false);
   bool is_adjusted_to_utc() const;
   LogicalType::TimeUnit::unit time_unit() const;
 
+  /// \brief If true, will not set LogicalType in Thrift metadata
+  bool is_from_converted_type() const;
+
+  /// \brief If true, will set ConvertedType for micros and millis
+  /// resolution in legacy ConvertedType Thrift metadata
+  bool force_set_converted_type() const;
+
  private:
   TimestampLogicalType() = default;
 };
diff --git a/cpp/src/plasma/client.cc b/cpp/src/plasma/client.cc
index ce9795d20fc..a6cdf7f17ca 100644
--- a/cpp/src/plasma/client.cc
+++ b/cpp/src/plasma/client.cc
@@ -791,11 +791,12 @@ Status PlasmaClient::Impl::Seal(const ObjectID& object_id) {
   auto object_entry = objects_in_use_.find(object_id);
 
   if (object_entry == objects_in_use_.end()) {
-    return Status::PlasmaObjectNonexistent(
-        "Seal() called on an object without a reference to it");
+    return MakePlasmaError(PlasmaErrorCode::PlasmaObjectNonexistent,
+                           "Seal() called on an object without a reference to it");
   }
   if (object_entry->second->is_sealed) {
-    return Status::PlasmaObjectAlreadySealed("Seal() called on an already sealed object");
+    return MakePlasmaError(PlasmaErrorCode::PlasmaObjectAlreadySealed,
+                           "Seal() called on an already sealed object");
   }
 
   object_entry->second->is_sealed = true;
@@ -896,7 +897,7 @@ Status PlasmaClient::Impl::Hash(const ObjectID& object_id, uint8_t* digest) {
   RETURN_NOT_OK(Get({object_id}, 0, &object_buffers));
   // If the object was not retrieved, return false.
   if (!object_buffers[0].data) {
-    return Status::PlasmaObjectNonexistent("Object not found");
+    return MakePlasmaError(PlasmaErrorCode::PlasmaObjectNonexistent, "Object not found");
   }
   // Compute the hash.
   uint64_t hash = ComputeObjectHash(object_buffers[0]);
diff --git a/cpp/src/plasma/common.cc b/cpp/src/plasma/common.cc
index 490aa158b33..bbcd2c9c3f1 100644
--- a/cpp/src/plasma/common.cc
+++ b/cpp/src/plasma/common.cc
@@ -18,6 +18,9 @@
 #include "plasma/common.h"
 
 #include <limits>
+#include <utility>
+
+#include "arrow/util/ubsan.h"
 
 #include "plasma/plasma_generated.h"
 
@@ -25,8 +28,88 @@ namespace fb = plasma::flatbuf;
 
 namespace plasma {
 
+namespace {
+
+const char kErrorDetailTypeId[] = "plasma::PlasmaStatusDetail";
+
+class PlasmaStatusDetail : public arrow::StatusDetail {
+ public:
+  explicit PlasmaStatusDetail(PlasmaErrorCode code) : code_(code) {}
+  const char* type_id() const override { return kErrorDetailTypeId; }
+  std::string ToString() const override {
+    const char* type;
+    switch (code()) {
+      case PlasmaErrorCode::PlasmaObjectExists:
+        type = "Plasma object exists";
+        break;
+      case PlasmaErrorCode::PlasmaObjectNonexistent:
+        type = "Plasma object is nonexistent";
+        break;
+      case PlasmaErrorCode::PlasmaStoreFull:
+        type = "Plasma store is full";
+        break;
+      case PlasmaErrorCode::PlasmaObjectAlreadySealed:
+        type = "Plasma object is already sealed";
+        break;
+      default:
+        type = "Unknown plasma error";
+        break;
+    }
+    return std::string(type);
+  }
+  PlasmaErrorCode code() const { return code_; }
+
+ private:
+  PlasmaErrorCode code_;
+};
+
+bool IsPlasmaStatus(const arrow::Status& status, PlasmaErrorCode code) {
+  if (status.ok()) {
+    return false;
+  }
+  auto* detail = status.detail().get();
+  return detail != nullptr && detail->type_id() == kErrorDetailTypeId &&
+         static_cast<PlasmaStatusDetail*>(detail)->code() == code;
+}
+
+}  // namespace
+
 using arrow::Status;
 
+arrow::Status MakePlasmaError(PlasmaErrorCode code, std::string message) {
+  arrow::StatusCode arrow_code = arrow::StatusCode::UnknownError;
+  switch (code) {
+    case PlasmaErrorCode::PlasmaObjectExists:
+      arrow_code = arrow::StatusCode::AlreadyExists;
+      break;
+    case PlasmaErrorCode::PlasmaObjectNonexistent:
+      arrow_code = arrow::StatusCode::KeyError;
+      break;
+    case PlasmaErrorCode::PlasmaStoreFull:
+      arrow_code = arrow::StatusCode::CapacityError;
+      break;
+    case PlasmaErrorCode::PlasmaObjectAlreadySealed:
+      // Maybe a stretch?
+      arrow_code = arrow::StatusCode::TypeError;
+      break;
+  }
+  return arrow::Status(arrow_code, std::move(message),
+                       std::make_shared<PlasmaStatusDetail>(code));
+}
+
+bool IsPlasmaObjectExists(const arrow::Status& status) {
+  return IsPlasmaStatus(status, PlasmaErrorCode::PlasmaObjectExists);
+}
+bool IsPlasmaObjectNonexistent(const arrow::Status& status) {
+  return IsPlasmaStatus(status, PlasmaErrorCode::PlasmaObjectNonexistent);
+}
+bool IsPlasmaObjectAlreadySealed(const arrow::Status& status) {
+  return IsPlasmaStatus(status, PlasmaErrorCode::PlasmaObjectAlreadySealed);
+}
+bool IsPlasmaStoreFull(const arrow::Status& status) {
+  return IsPlasmaStatus(status, PlasmaErrorCode::PlasmaStoreFull);
+}
+
 UniqueID UniqueID::from_binary(const std::string& binary) {
   UniqueID id;
   std::memcpy(&id, binary.data(), sizeof(id));
@@ -64,7 +147,7 @@ uint64_t MurmurHash64A(const void* key, int len, unsigned int seed) {
   const uint64_t* end = data + (len / 8);
 
   while (data != end) {
-    uint64_t k = *data++;
+    uint64_t k = arrow::util::SafeLoad(data++);
 
     k *= m;
     k ^= k >> r;
diff --git a/cpp/src/plasma/common.h b/cpp/src/plasma/common.h
index 6f4cef5becb..d42840cfbd2 100644
--- a/cpp/src/plasma/common.h
+++ b/cpp/src/plasma/common.h
@@ -41,6 +41,23 @@ namespace plasma {
 
 enum class ObjectLocation : int32_t { Local, Remote, Nonexistent };
 
+enum class PlasmaErrorCode : int8_t {
+  PlasmaObjectExists = 1,
+  PlasmaObjectNonexistent = 2,
+  PlasmaStoreFull = 3,
+  PlasmaObjectAlreadySealed = 4,
+};
+
+ARROW_EXPORT arrow::Status MakePlasmaError(PlasmaErrorCode code, std::string message);
+/// Return true iff the status indicates an already existing Plasma object.
+ARROW_EXPORT bool IsPlasmaObjectExists(const arrow::Status& status);
+/// Return true iff the status indicates a non-existent Plasma object.
+ARROW_EXPORT bool IsPlasmaObjectNonexistent(const arrow::Status& status);
+/// Return true iff the status indicates an already sealed Plasma object.
+ARROW_EXPORT bool IsPlasmaObjectAlreadySealed(const arrow::Status& status);
+/// Return true iff the status indicates the Plasma store reached its capacity limit.
+ARROW_EXPORT bool IsPlasmaStoreFull(const arrow::Status& status);
+
 constexpr int64_t kUniqueIDSize = 20;
 
 class ARROW_EXPORT UniqueID {
diff --git a/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc b/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
index 248c268c071..0964df46fd6 100644
--- a/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
+++ b/cpp/src/plasma/lib/java/org_apache_arrow_plasma_PlasmaClientJNI.cc
@@ -111,13 +111,13 @@ JNIEXPORT jobject JNICALL Java_org_apache_arrow_plasma_PlasmaClientJNI_create(
 
   std::shared_ptr<Buffer> data;
   Status s = client->Create(oid, size, md, md_size, &data);
-  if (s.IsPlasmaObjectExists()) {
+  if (plasma::IsPlasmaObjectExists(s)) {
     jclass exceptionClass =
         env->FindClass("org/apache/arrow/plasma/exceptions/DuplicateObjectException");
     env->ThrowNew(exceptionClass, oid.hex().c_str());
     return nullptr;
   }
-  if (s.IsPlasmaStoreFull()) {
+  if (plasma::IsPlasmaStoreFull(s)) {
     jclass exceptionClass =
         env->FindClass("org/apache/arrow/plasma/exceptions/PlasmaOutOfMemoryException");
     env->ThrowNew(exceptionClass, "");
diff --git a/cpp/src/plasma/protocol.cc b/cpp/src/plasma/protocol.cc
index b87656bd097..c22d77d6019 100644
--- a/cpp/src/plasma/protocol.cc
+++ b/cpp/src/plasma/protocol.cc
@@ -86,11 +86,14 @@ Status PlasmaErrorStatus(fb::PlasmaError plasma_error) {
     case fb::PlasmaError::OK:
       return Status::OK();
     case fb::PlasmaError::ObjectExists:
-      return Status::PlasmaObjectExists("object already exists in the plasma store");
+      return MakePlasmaError(PlasmaErrorCode::PlasmaObjectExists,
+                             "object already exists in the plasma store");
     case fb::PlasmaError::ObjectNonexistent:
-      return Status::PlasmaObjectNonexistent("object does not exist in the plasma store");
+      return MakePlasmaError(PlasmaErrorCode::PlasmaObjectNonexistent,
+                             "object does not exist in the plasma store");
     case fb::PlasmaError::OutOfMemory:
-      return Status::PlasmaStoreFull("object does not fit in the plasma store");
+      return MakePlasmaError(PlasmaErrorCode::PlasmaStoreFull,
+                             "object does not fit in the plasma store");
     default:
       ARROW_LOG(FATAL) << "unknown plasma error code " << static_cast<int>(plasma_error);
   }
diff --git a/cpp/src/plasma/store.cc b/cpp/src/plasma/store.cc
index c574d094a98..2c3361e3de7 100644
--- a/cpp/src/plasma/store.cc
+++ b/cpp/src/plasma/store.cc
@@ -215,10 +215,6 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
     // ignore this requst.
     return PlasmaError::ObjectExists;
   }
-  auto ptr = std::unique_ptr<ObjectTableEntry>(new ObjectTableEntry());
-  entry = store_info_.objects.emplace(object_id, std::move(ptr)).first->second.get();
-  entry->data_size = data_size;
-  entry->metadata_size = metadata_size;
 
   int fd = -1;
   int64_t map_size = 0;
@@ -226,29 +222,35 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
   uint8_t* pointer = nullptr;
   auto total_size = data_size + metadata_size;
 
-  if (device_num != 0) {
+  if (device_num == 0) {
+    pointer = AllocateMemory(total_size, &fd, &map_size, &offset);
+    if (!pointer) {
+      ARROW_LOG(ERROR) << "Not enough memory to create the object " << object_id.hex()
+                       << ", data_size=" << data_size
+                       << ", metadata_size=" << metadata_size
+                       << ", will send a reply of PlasmaError::OutOfMemory";
+      return PlasmaError::OutOfMemory;
+    }
+  } else {
 #ifdef PLASMA_CUDA
-    auto st = AllocateCudaMemory(device_num, total_size, &pointer, &entry->ipc_handle);
+    /// IPC GPU handle to share with clients.
+    std::shared_ptr<::arrow::cuda::CudaIpcMemHandle> ipc_handle;
+    auto st = AllocateCudaMemory(device_num, total_size, &pointer, &ipc_handle);
     if (!st.ok()) {
       ARROW_LOG(ERROR) << "Failed to allocate CUDA memory: " << st.ToString();
       return PlasmaError::OutOfMemory;
     }
-    result->ipc_handle = entry->ipc_handle;
+    result->ipc_handle = ipc_handle;
 #else
     ARROW_LOG(ERROR) << "device_num != 0 but CUDA not enabled";
     return PlasmaError::OutOfMemory;
 #endif
-  } else {
-    pointer = AllocateMemory(total_size, &fd, &map_size, &offset);
-    if (!pointer) {
-      ARROW_LOG(ERROR) << "Not enough memory to create the object " << object_id.hex()
-                       << ", data_size=" << data_size
-                       << ", metadata_size=" << metadata_size
-                       << ", will send a reply of PlasmaError::OutOfMemory";
-      return PlasmaError::OutOfMemory;
-    }
   }
 
+  auto ptr = std::unique_ptr<ObjectTableEntry>(new ObjectTableEntry());
+  entry = store_info_.objects.emplace(object_id, std::move(ptr)).first->second.get();
+  entry->data_size = data_size;
+  entry->metadata_size = metadata_size;
   entry->pointer = pointer;
   // TODO(pcm): Set the other fields.
   entry->fd = fd;
@@ -259,6 +261,10 @@ PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, int64_t data_si
   entry->create_time = std::time(nullptr);
   entry->construct_duration = -1;
 
+#ifdef PLASMA_CUDA
+  entry->ipc_handle = result->ipc_handle;
+#endif
+
   result->store_fd = fd;
   result->data_offset = offset;
   result->metadata_offset = offset + data_size;
diff --git a/cpp/src/plasma/test/client_tests.cc b/cpp/src/plasma/test/client_tests.cc
index 435b687a69e..deffde57976 100644
--- a/cpp/src/plasma/test/client_tests.cc
+++ b/cpp/src/plasma/test/client_tests.cc
@@ -157,7 +157,7 @@ TEST_F(TestPlasmaStore, SealErrorsTest) {
   ObjectID object_id = random_object_id();
 
   Status result = client_.Seal(object_id);
-  ASSERT_TRUE(result.IsPlasmaObjectNonexistent());
+  ASSERT_TRUE(IsPlasmaObjectNonexistent(result));
 
   // Create object.
   std::vector<uint8_t> data(100, 0);
@@ -165,7 +165,7 @@ TEST_F(TestPlasmaStore, SealErrorsTest) {
 
   // Trying to seal it again.
   result = client_.Seal(object_id);
-  ASSERT_TRUE(result.IsPlasmaObjectAlreadySealed());
+  ASSERT_TRUE(IsPlasmaObjectAlreadySealed(result));
   ARROW_CHECK_OK(client_.Release(object_id));
 }
 
diff --git a/cpp/src/plasma/test/serialization_tests.cc b/cpp/src/plasma/test/serialization_tests.cc
index 7e2bc887ed3..f3cff428582 100644
--- a/cpp/src/plasma/test/serialization_tests.cc
+++ b/cpp/src/plasma/test/serialization_tests.cc
@@ -156,7 +156,7 @@ TEST_F(TestPlasmaSerialization, SealReply) {
   ObjectID object_id2;
   Status s = ReadSealReply(data.data(), data.size(), &object_id2);
   ASSERT_EQ(object_id1, object_id2);
-  ASSERT_TRUE(s.IsPlasmaObjectExists());
+  ASSERT_TRUE(IsPlasmaObjectExists(s));
   close(fd);
 }
 
@@ -234,7 +234,7 @@ TEST_F(TestPlasmaSerialization, ReleaseReply) {
   ObjectID object_id2;
   Status s = ReadReleaseReply(data.data(), data.size(), &object_id2);
   ASSERT_EQ(object_id1, object_id2);
-  ASSERT_TRUE(s.IsPlasmaObjectExists());
+  ASSERT_TRUE(IsPlasmaObjectExists(s));
   close(fd);
 }
 
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index b17d01cf7a8..c9684040893 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -12,7 +12,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>0.14.0</Version>
+    <Version>1.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/csharp/src/Apache.Arrow/BitUtility.cs b/csharp/src/Apache.Arrow/BitUtility.cs
index a5da46bb1c9..7d2cfbfd365 100644
--- a/csharp/src/Apache.Arrow/BitUtility.cs
+++ b/csharp/src/Apache.Arrow/BitUtility.cs
@@ -116,6 +116,14 @@ public static int CountBits(ReadOnlySpan<byte> data)
         public static long RoundUpToMultipleOf64(long n) =>
             RoundUpToMultiplePowerOfTwo(n, 64);
 
+        /// <summary>
+        /// Rounds an integer to the nearest multiple of 8.
+        /// </summary>
+        /// <param name="n">Integer to round.</param>
+        /// <returns>Integer rounded to the nearest multiple of 8.</returns>
+        public static long RoundUpToMultipleOf8(long n) =>
+            RoundUpToMultiplePowerOfTwo(n, 8);
+
         /// <summary>
         /// Rounds an integer up to the nearest multiple of factor, where
         /// factor must be a power of two.
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 7696dfaa2e8..e1da4489ce4 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -45,17 +45,15 @@ internal class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<StringArray>,
             IArrowArrayVisitor<BinaryArray>
         {
-            public struct Buffer
+            public readonly struct Buffer
             {
                 public readonly ArrowBuffer DataBuffer;
                 public readonly int Offset;
-                public readonly int Length;
 
-                public Buffer(ArrowBuffer buffer, int offset, int length)
+                public Buffer(ArrowBuffer buffer, int offset)
                 {
                     DataBuffer = buffer;
                     Offset = offset;
-                    Length = length;
                 }
             }
 
@@ -109,7 +107,7 @@ public void Visit(BinaryArray array)
 
             private void CreateBuffers(BooleanArray array)
             {
-                _buffers.Add(CreateBuffer(ArrowBuffer.Empty));
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
                 _buffers.Add(CreateBuffer(array.ValueBuffer));
             }
 
@@ -124,9 +122,10 @@ private Buffer CreateBuffer(ArrowBuffer buffer)
             {
                 var offset = TotalLength;
 
-                TotalLength += buffer.Length;
+                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
+                TotalLength += paddedLength;
 
-                return new Buffer(buffer, offset, buffer.Length);
+                return new Buffer(buffer, offset);
             }
 
             public void Visit(IArrowArray array)
@@ -181,28 +180,26 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
                 HasWrittenSchema = true;
             }
 
-            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
-
             Builder.Clear();
 
             // Serialize field nodes
 
             var fieldCount = Schema.Fields.Count;
-            var fieldNodeOffsets = new Offset<Flatbuf.FieldNode>[fieldCount];
 
             Flatbuf.RecordBatch.StartNodesVector(Builder, fieldCount);
 
-            for (var i = 0; i < fieldCount; i++)
+            // flatbuffer struct vectors have to be created in reverse order
+            for (var i = fieldCount - 1; i >= 0; i--)
             {
                 var fieldArray = recordBatch.Column(i);
-                fieldNodeOffsets[i] =
-                    Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
+                Flatbuf.FieldNode.CreateFieldNode(Builder, fieldArray.Length, fieldArray.NullCount);
             }
 
             var fieldNodesVectorOffset = Builder.EndVector();
 
             // Serialize buffers
 
+            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
             for (var i = 0; i < fieldCount; i++)
             {
                 var fieldArray = recordBatch.Column(i);
@@ -213,10 +210,11 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
 
             Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
 
+            // flatbuffer struct vectors have to be created in reverse order
             for (var i = buffers.Count - 1; i >= 0; i--)
             {
                 Flatbuf.Buffer.CreateBuffer(Builder,
-                    buffers[i].Offset, buffers[i].Length);
+                    buffers[i].Offset, buffers[i].DataBuffer.Length);
             }
 
             var buffersVectorOffset = Builder.EndVector();
@@ -239,11 +237,20 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
 
             for (var i = 0; i < buffers.Count; i++)
             {
-                if (buffers[i].DataBuffer.IsEmpty)
+                ArrowBuffer buffer = buffers[i].DataBuffer;
+                if (buffer.IsEmpty)
                     continue;
 
-                await WriteBufferAsync(buffers[i].DataBuffer, cancellationToken).ConfigureAwait(false);
-                bodyLength += buffers[i].DataBuffer.Length;
+                await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false);
+
+                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
+                int padding = paddedLength - buffer.Length;
+                if (padding > 0)
+                {
+                    await WritePaddingAsync(padding).ConfigureAwait(false);
+                }
+
+                bodyLength += paddedLength;
             }
 
             // Write padding so the record batch message body length is a multiple of 8 bytes
@@ -333,7 +340,7 @@ private async ValueTask<long> WriteMessageAsync<T>(
             where T: struct
         {
             var messageOffset = Flatbuf.Message.CreateMessage(
-                Builder, CurrentMetadataVersion, headerType, headerOffset.Value, 
+                Builder, CurrentMetadataVersion, headerType, headerOffset.Value,
                 bodyLength);
 
             Builder.Finish(messageOffset.Value);
diff --git a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
new file mode 100644
index 00000000000..5b19486ebaf
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
@@ -0,0 +1,57 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Tests;
+using BenchmarkDotNet.Attributes;
+using System.IO;
+using System.Threading.Tasks;
+
+namespace Apache.Arrow.Benchmarks
+{
+    //[EtwProfiler] - needs elevated privileges
+    [MemoryDiagnoser]
+    public class ArrowWriterBenchmark
+    {
+        [Params(10_000, 1_000_000)]
+        public int BatchLength{ get; set; }
+
+        [Params(10, 25)]
+        public int ColumnSetCount { get; set; }
+
+        private MemoryStream _memoryStream;
+        private RecordBatch _batch;
+
+        [GlobalSetup]
+        public void GlobalSetup()
+        {
+            _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount);
+            _memoryStream = new MemoryStream();
+        }
+
+        [IterationSetup]
+        public void Setup()
+        {
+            _memoryStream.Position = 0;
+        }
+
+        [Benchmark]
+        public async Task WriteBatch()
+        {
+            ArrowStreamWriter writer = new ArrowStreamWriter(_memoryStream, _batch.Schema);
+            await writer.WriteRecordBatchAsync(_batch);
+        }
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
index 3ef747d61be..06be8bd6504 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs
@@ -14,8 +14,10 @@
 // limitations under the License.
 
 using Apache.Arrow.Ipc;
+using Apache.Arrow.Types;
 using System;
 using System.IO;
+using System.Linq;
 using System.Net;
 using System.Net.Sockets;
 using System.Threading.Tasks;
@@ -89,6 +91,39 @@ public async Task WriteEmptyBatch()
         {
             RecordBatch originalBatch = TestData.CreateSampleRecordBatch(length: 0);
 
+            await TestRoundTripRecordBatch(originalBatch);
+        }
+
+        [Fact]
+        public async Task WriteBatchWithNulls()
+        {
+            RecordBatch originalBatch = new RecordBatch.Builder()
+                .Append("Column1", false, col => col.Int32(array => array.AppendRange(Enumerable.Range(0, 10))))
+                .Append("Column2", true, new Int32Array(
+                    valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+                    length: 10,
+                    nullCount: 2,
+                    offset: 0))
+                .Append("Column3", true, new Int32Array(
+                    valueBuffer: new ArrowBuffer.Builder<int>().AppendRange(Enumerable.Range(0, 10)).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0x00).Append(0x00).Build(),
+                    length: 10,
+                    nullCount: 10,
+                    offset: 0))
+                .Append("NullableBooleanColumn", true, new BooleanArray(
+                    valueBuffer: new ArrowBuffer.Builder<byte>().Append(0xfd).Append(0xff).Build(),
+                    nullBitmapBuffer: new ArrowBuffer.Builder<byte>().Append(0xed).Append(0xff).Build(),
+                    length: 10,
+                    nullCount: 3,
+                    offset: 0))
+                .Build();
+
+            await TestRoundTripRecordBatch(originalBatch);
+        }
+
+        private static async Task TestRoundTripRecordBatch(RecordBatch originalBatch)
+        {
             using (MemoryStream stream = new MemoryStream())
             {
                 using (var writer = new ArrowStreamWriter(stream, originalBatch.Schema, leaveOpen: true))
@@ -105,5 +140,60 @@ public async Task WriteEmptyBatch()
                 }
             }
         }
+
+        [Fact]
+        public async Task WriteBatchWithCorrectPadding()
+        {
+            byte value1 = 0x04;
+            byte value2 = 0x14;
+            var batch = new RecordBatch(
+                new Schema.Builder()
+                    .Field(f => f.Name("age").DataType(Int32Type.Default))
+                    .Field(f => f.Name("characterCount").DataType(Int32Type.Default))
+                    .Build(),
+                new IArrowArray[]
+                {
+                    new Int32Array(
+                        new ArrowBuffer(new byte[] { value1, value1, 0x00, 0x00 }),
+                        ArrowBuffer.Empty,
+                        length: 1,
+                        nullCount: 0,
+                        offset: 0),
+                    new Int32Array(
+                        new ArrowBuffer(new byte[] { value2, value2, 0x00, 0x00 }),
+                        ArrowBuffer.Empty,
+                        length: 1,
+                        nullCount: 0,
+                        offset: 0)
+                },
+                length: 1);
+
+            await TestRoundTripRecordBatch(batch);
+
+            using (MemoryStream stream = new MemoryStream())
+            {
+                using (var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true))
+                {
+                    await writer.WriteRecordBatchAsync(batch);
+                }
+
+                byte[] writtenBytes = stream.ToArray();
+
+                // ensure that the data buffers at the end are 8-byte aligned
+                Assert.Equal(value1, writtenBytes[writtenBytes.Length - 16]);
+                Assert.Equal(value1, writtenBytes[writtenBytes.Length - 15]);
+                for (int i = 14; i > 8; i--)
+                {
+                    Assert.Equal(0, writtenBytes[writtenBytes.Length - i]);
+                }
+
+                Assert.Equal(value2, writtenBytes[writtenBytes.Length - 8]);
+                Assert.Equal(value2, writtenBytes[writtenBytes.Length - 7]);
+                for (int i = 6; i > 0; i--)
+                {
+                    Assert.Equal(0, writtenBytes[writtenBytes.Length - i]);
+                }
+            }
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 1bc046dd74c..15774a75c43 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -22,27 +22,35 @@ namespace Apache.Arrow.Tests
     public static class TestData
     {
         public static RecordBatch CreateSampleRecordBatch(int length)
+        {
+            return CreateSampleRecordBatch(length, columnSetCount: 1);
+        }
+
+        public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount)
         {
             Schema.Builder builder = new Schema.Builder();
-            builder.Field(CreateField(BooleanType.Default));
-            builder.Field(CreateField(UInt8Type.Default));
-            builder.Field(CreateField(Int8Type.Default));
-            builder.Field(CreateField(UInt16Type.Default));
-            builder.Field(CreateField(Int16Type.Default));
-            builder.Field(CreateField(UInt32Type.Default));
-            builder.Field(CreateField(Int32Type.Default));
-            builder.Field(CreateField(UInt64Type.Default));
-            builder.Field(CreateField(Int64Type.Default));
-            builder.Field(CreateField(FloatType.Default));
-            builder.Field(CreateField(DoubleType.Default));
-            //builder.Field(CreateField(new DecimalType(19, 2)));
-            //builder.Field(CreateField(HalfFloatType.Default));
-            //builder.Field(CreateField(StringType.Default));
-            //builder.Field(CreateField(Date32Type.Default));
-            //builder.Field(CreateField(Date64Type.Default));
-            //builder.Field(CreateField(Time32Type.Default));
-            //builder.Field(CreateField(Time64Type.Default));
-            //builder.Field(CreateField(TimestampType.Default));
+            for (int i = 0; i < columnSetCount; i++)
+            {
+                builder.Field(CreateField(BooleanType.Default, i));
+                builder.Field(CreateField(UInt8Type.Default, i));
+                builder.Field(CreateField(Int8Type.Default, i));
+                builder.Field(CreateField(UInt16Type.Default, i));
+                builder.Field(CreateField(Int16Type.Default, i));
+                builder.Field(CreateField(UInt32Type.Default, i));
+                builder.Field(CreateField(Int32Type.Default, i));
+                builder.Field(CreateField(UInt64Type.Default, i));
+                builder.Field(CreateField(Int64Type.Default, i));
+                builder.Field(CreateField(FloatType.Default, i));
+                builder.Field(CreateField(DoubleType.Default, i));
+                //builder.Field(CreateField(new DecimalType(19, 2)));
+                //builder.Field(CreateField(HalfFloatType.Default));
+                //builder.Field(CreateField(StringType.Default));
+                //builder.Field(CreateField(Date32Type.Default));
+                //builder.Field(CreateField(Date64Type.Default));
+                //builder.Field(CreateField(Time32Type.Default));
+                //builder.Field(CreateField(Time64Type.Default));
+                //builder.Field(CreateField(TimestampType.Default));
+            }
 
             Schema schema = builder.Build();
 
@@ -51,9 +59,9 @@ public static RecordBatch CreateSampleRecordBatch(int length)
             return new RecordBatch(schema, arrays, length);
         }
 
-        private static Field CreateField(ArrowType type)
+        private static Field CreateField(ArrowType type, int iteration)
         {
-            return new Field(type.Name, type, nullable: false);
+            return new Field(type.Name + iteration, type, nullable: false);
         }
 
         private static IEnumerable<IArrowArray> CreateArrays(Schema schema, int length)
diff --git a/dev/fuzzit/Dockerfile b/dev/fuzzit/Dockerfile
new file mode 100644
index 00000000000..602cb5f254c
--- /dev/null
+++ b/dev/fuzzit/Dockerfile
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM arrow:cpp
+
+RUN apt-get update && \
+    apt-get install -y -q \
+      clang-7 \
+      libclang-7-dev \
+      clang-format-7 \
+      clang-tidy-7 \
+      clang-tools-7
+
+CMD ["arrow/ci/docker_build_and_fuzzit.sh"]
diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py
index b295ac5e658..623e3352ae5 100755
--- a/dev/merge_arrow_pr.py
+++ b/dev/merge_arrow_pr.py
@@ -28,8 +28,8 @@
 # There are several pieces of authorization possibly needed via environment
 # variables
 #
-# JIRA_USERNAME: your Apache JIRA id
-# JIRA_PASSWORD: your Apache JIRA password
+# APACHE_JIRA_USERNAME: your Apache JIRA id
+# APACHE_JIRA_PASSWORD: your Apache JIRA password
 # ARROW_GITHUB_API_TOKEN: a GitHub API token to use for API requests (to avoid
 # rate limiting)
 
@@ -52,6 +52,16 @@
     print("Exiting without trying to close the associated JIRA.")
     sys.exit(1)
 
+# Remote name which points to the GitHub site
+PR_REMOTE_NAME = os.environ.get("PR_REMOTE_NAME", "apache")
+
+# For testing to avoid accidentally pushing to apache
+DEBUG = bool(int(os.environ.get("DEBUG", 0)))
+
+
+if DEBUG:
+    print("**************** DEBUGGING ****************")
+
 
 # Prefix added to temporary branches
 BRANCH_PREFIX = "PR_TOOL"
@@ -339,7 +349,27 @@ def merge(self, target_ref='master'):
         distinct_authors = sorted(set(commit_authors),
                                   key=lambda x: commit_authors.count(x),
                                   reverse=True)
-        primary_author = distinct_authors[0]
+
+        for i, author in enumerate(distinct_authors):
+            print("Author {}: {}".format(i + 1, author))
+
+        if len(distinct_authors) > 1:
+            primary_author = self.cmd.prompt(
+                "Enter primary author in the format of "
+                "\"name <email>\" [%s]: " % distinct_authors[0])
+
+            if primary_author == "":
+                primary_author = distinct_authors[0]
+            else:
+                # When primary author is specified manually, de-dup it from
+                # author list and put it at the head of author list.
+                distinct_authors = [x for x in distinct_authors
+                                    if x != primary_author]
+                distinct_authors = [primary_author] + distinct_authors
+        else:
+            # If there is only one author, do not prompt for a lead author
+            primary_author = distinct_authors[0]
+
         commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
                           '--pretty=format:%h <%an> %s']).split("\n\n")
 
@@ -349,9 +379,17 @@ def merge(self, target_ref='master'):
         if self.body is not None:
             merge_message_flags += ["-m", self.body]
 
-        authors = "\n".join(["Author: %s" % a for a in distinct_authors])
+        committer_name = run_cmd("git config --get user.name").strip()
+        committer_email = run_cmd("git config --get user.email").strip()
 
-        merge_message_flags += ["-m", authors]
+        authors = ("Authored-by:" if len(distinct_authors) == 1
+                   else "Lead-authored-by:")
+        authors += " %s" % (distinct_authors.pop(0))
+        if len(distinct_authors) > 0:
+            authors += "\n" + "\n".join(["Co-authored-by: %s" % a
+                                         for a in distinct_authors])
+        authors += "\n" + "Signed-off-by: %s <%s>" % (committer_name,
+                                                      committer_email)
 
         if had_conflicts:
             committer_name = run_cmd("git config --get user.name").strip()
@@ -371,6 +409,11 @@ def merge(self, target_ref='master'):
             stripped_message = strip_ci_directives(c).strip()
             merge_message_flags += ["-m", stripped_message]
 
+        merge_message_flags += ["-m", authors]
+
+        if DEBUG:
+            print("\n".join(merge_message_flags))
+
         run_cmd(['git', 'commit',
                  '--no-verify',  # do not run commit hooks
                  '--author="%s"' % primary_author] +
@@ -380,9 +423,13 @@ def merge(self, target_ref='master'):
                                 % (target_branch_name, self.git_remote))
 
         try:
-            run_cmd('git push %s %s:%s' % (self.git_remote,
-                                           target_branch_name,
-                                           target_ref))
+            push_cmd = ('git push %s %s:%s' % (self.git_remote,
+                                               target_branch_name,
+                                               target_ref))
+            if DEBUG:
+                print(push_cmd)
+            else:
+                run_cmd(push_cmd)
         except Exception as e:
             clean_up()
             self.cmd.fail("Exception while pushing: %s" % e)
@@ -415,17 +462,17 @@ def get_version_json(version_str):
 
 def connect_jira(cmd):
     # ASF JIRA username
-    jira_username = os.environ.get("JIRA_USERNAME")
+    jira_username = os.environ.get("APACHE_JIRA_USERNAME")
 
     # ASF JIRA password
-    jira_password = os.environ.get("JIRA_PASSWORD")
+    jira_password = os.environ.get("APACHE_JIRA_PASSWORD")
 
     if not jira_username:
-        jira_username = cmd.prompt("Env JIRA_USERNAME not set, "
+        jira_username = cmd.prompt("Env APACHE_JIRA_USERNAME not set, "
                                    "please enter your JIRA username:")
 
     if not jira_password:
-        jira_password = cmd.getpass("Env JIRA_PASSWORD not set, "
+        jira_password = cmd.getpass("Env APACHE_JIRA_PASSWORD not set, "
                                     "please enter "
                                     "your JIRA password:")
 
@@ -444,15 +491,12 @@ def cli():
 
     pr_num = input("Which pull request would you like to merge? (e.g. 34): ")
 
-    # Remote name which points to the GitHub site
-    git_remote = os.environ.get("PR_REMOTE_NAME", "apache")
-
     os.chdir(ARROW_HOME)
 
     github_api = GitHubAPI(PROJECT_NAME)
 
     jira_con = connect_jira(cmd)
-    pr = PullRequest(cmd, github_api, git_remote, jira_con, pr_num)
+    pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
 
     if pr.is_merged:
         print("Pull request %s has already been merged")
diff --git a/dev/release/00-prepare-test.rb b/dev/release/00-prepare-test.rb
index 77be164e0fe..77a048a372c 100644
--- a/dev/release/00-prepare-test.rb
+++ b/dev/release/00-prepare-test.rb
@@ -85,6 +85,13 @@ def test_update_version_pre_tag
                         "+version = '#{@release_version}'"],
                      ],
                    },
+                   {
+                     path: "ci/PKGBUILD",
+                     hunks: [
+                       ["-pkgver=#{@previous_version}.9000",
+                        "+pkgver=#{@release_version}"],
+                     ],
+                   },
                    {
                      path: "cpp/CMakeLists.txt",
                      hunks: [
@@ -181,10 +188,10 @@ def test_update_version_pre_tag
                      hunks: [
                        ["-version = \"#{@snapshot_version}\"",
                         "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\" }",
-                        "-parquet = { path = \"../parquet\" }",
-                        "+arrow = \"#{@release_version}\"",
-                        "+parquet = \"#{@release_version}\""]
+                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
+                        "-parquet = { path = \"../parquet\", version = \"#{@snapshot_version}\" }",
+                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
+                        "+parquet = { path = \"../parquet\", version = \"#{@release_version}\" }"]
                      ],
                    },
                    {
@@ -199,8 +206,8 @@ def test_update_version_pre_tag
                      hunks: [
                        ["-version = \"#{@snapshot_version}\"",
                         "+version = \"#{@release_version}\""],
-                       ["-arrow = { path = \"../arrow\" }",
-                        "+arrow = \"#{@release_version}\""]
+                       ["-arrow = { path = \"../arrow\", version = \"#{@snapshot_version}\" }",
+                        "+arrow = { path = \"../arrow\", version = \"#{@release_version}\" }"]
                      ],
                    },
                    {
@@ -224,49 +231,56 @@ def test_update_version_post_tag
                      path: "c_glib/configure.ac",
                      hunks: [
                        ["-m4_define([arrow_glib_version], #{@release_version})",
-                        "+m4_define([arrow_glib_version], #{@next_version}-SNAPSHOT)"],
+                        "+m4_define([arrow_glib_version], #{@next_snapshot_version})"],
                      ],
                    },
                    {
                      path: "c_glib/meson.build",
                      hunks: [
                        ["-version = '#{@release_version}'",
-                        "+version = '#{@next_version}-SNAPSHOT'"],
+                        "+version = '#{@next_snapshot_version}'"],
+                     ],
+                   },
+                   {
+                     path: "ci/PKGBUILD",
+                     hunks: [
+                       ["-pkgver=#{@release_version}",
+                        "+pkgver=#{@release_version}.9000"],
                      ],
                    },
                    {
                      path: "cpp/CMakeLists.txt",
                      hunks: [
                        ["-set(ARROW_VERSION \"#{@release_version}\")",
-                        "+set(ARROW_VERSION \"#{@next_version}-SNAPSHOT\")"],
+                        "+set(ARROW_VERSION \"#{@next_snapshot_version}\")"],
                      ],
                    },
                    {
                      path: "csharp/Directory.Build.props",
                      hunks: [
                        ["-    <Version>#{@release_version}</Version>",
-                        "+    <Version>#{@next_version}-SNAPSHOT</Version>"],
+                        "+    <Version>#{@next_snapshot_version}</Version>"],
                      ],
                    },
                    {
                      path: "js/package.json",
                      hunks: [
                        ["-  \"version\": \"#{@release_version}\"",
-                        "+  \"version\": \"#{@next_version}-SNAPSHOT\""],
+                        "+  \"version\": \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "matlab/CMakeLists.txt",
                      hunks: [
                        ["-set(MLARROW_VERSION \"#{@release_version}\")",
-                        "+set(MLARROW_VERSION \"#{@next_version}-SNAPSHOT\")"],
+                        "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"],
                      ],
                    },
                    {
                      path: "python/setup.py",
                      hunks: [
                        ["-default_version = '#{@release_version}'",
-                        "+default_version = '#{@next_version}-SNAPSHOT'"],
+                        "+default_version = '#{@next_snapshot_version}'"],
                      ],
                    },
                    {
@@ -288,78 +302,78 @@ def test_update_version_post_tag
                      path: "ruby/red-arrow-cuda/lib/arrow-cuda/version.rb",
                      hunks: [
                        ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_version}-SNAPSHOT\""],
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-arrow/lib/arrow/version.rb",
                      hunks: [
                        ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_version}-SNAPSHOT\""],
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-gandiva/lib/gandiva/version.rb",
                      hunks: [
                        ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_version}-SNAPSHOT\""],
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-parquet/lib/parquet/version.rb",
                      hunks: [
                        ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_version}-SNAPSHOT\""],
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "ruby/red-plasma/lib/plasma/version.rb",
                      hunks: [
                        ["-  VERSION = \"#{@release_version}\"",
-                        "+  VERSION = \"#{@next_version}-SNAPSHOT\""],
+                        "+  VERSION = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "rust/arrow/Cargo.toml",
                      hunks: [
                        ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_version}-SNAPSHOT\""],
+                        "+version = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "rust/datafusion/Cargo.toml",
                      hunks: [
                        ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_version}-SNAPSHOT\""],
-                       ["-arrow = \"#{@release_version}\"",
-                        "-parquet = \"#{@release_version}\"",
-                        "+arrow = { path = \"../arrow\" }",
-                        "+parquet = { path = \"../parquet\" }"]
+                        "+version = \"#{@next_snapshot_version}\""],
+                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
+                        "-parquet = { path = \"../parquet\", version = \"#{@release_version}\" }",
+                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }",
+                        "+parquet = { path = \"../parquet\", version = \"#{@next_snapshot_version}\" }"]
                      ],
                    },
                    {
                      path: "rust/datafusion/README.md",
                      hunks: [
                        ["-datafusion = \"#{@release_version}\"",
-                        "+datafusion = \"#{@next_version}-SNAPSHOT\""],
+                        "+datafusion = \"#{@next_snapshot_version}\""],
                      ],
                    },
                    {
                      path: "rust/parquet/Cargo.toml",
                      hunks: [
                        ["-version = \"#{@release_version}\"",
-                        "+version = \"#{@next_version}-SNAPSHOT\""],
-                       ["-arrow = \"#{@release_version}\"",
-                        "+arrow = { path = \"../arrow\" }"]
+                        "+version = \"#{@next_snapshot_version}\""],
+                       ["-arrow = { path = \"../arrow\", version = \"#{@release_version}\" }",
+                        "+arrow = { path = \"../arrow\", version = \"#{@next_snapshot_version}\" }"]
                      ],
                    },
                    {
                      path: "rust/parquet/README.md",
                      hunks: [
                        ["-parquet = \"#{@release_version}\"",
-                        "+parquet = \"#{@next_version}-SNAPSHOT\""],
+                        "+parquet = \"#{@next_snapshot_version}\""],
                        ["-See [crate documentation](https://docs.rs/crate/parquet/#{@release_version}) on available API.",
-                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@next_version}-SNAPSHOT) on available API."],
+                        "+See [crate documentation](https://docs.rs/crate/parquet/#{@next_snapshot_version}) on available API."],
                      ],
                    },
                  ],
diff --git a/dev/release/00-prepare.sh b/dev/release/00-prepare.sh
index b51ba7b893c..34989fb7758 100755
--- a/dev/release/00-prepare.sh
+++ b/dev/release/00-prepare.sh
@@ -101,6 +101,14 @@ update_versions() {
   git add DESCRIPTION
   cd -
 
+  cd "${SOURCE_DIR}/../../ci"
+  sed -i.bak -E -e \
+    "s/^pkgver=.+/pkgver=${r_version}/" \
+    PKGBUILD
+  rm -f PKGBUILD.bak
+  git add PKGBUILD
+  cd -
+
   cd "${SOURCE_DIR}/../../r"
   if [ ${type} = "snapshot" ]; then
     # Add a news entry for the new dev version
@@ -128,20 +136,11 @@ update_versions() {
   cd -
 
   cd "${SOURCE_DIR}/../../rust"
-  sed -i.bak -E -e \
-    "s/^version = \".+\"/version = \"${version}\"/g" \
+  sed -i.bak -E \
+    -e "s/^version = \".+\"/version = \"${version}\"/g" \
+    -e "s/^(arrow = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
+    -e "s/^(parquet = .* version = )\".+\"( .*)/\\1\"${version}\"\\2/g" \
     */Cargo.toml
-  if [ ${type} = "snapshot" ]; then
-    sed -i.bak -E \
-      -e "s/^arrow = \".+\"/arrow = { path = \"..\/arrow\" }/g" \
-      -e "s/^parquet = \".+\"/parquet = { path = \"..\/parquet\" }/g" \
-      */Cargo.toml
-  else
-    sed -i.bak -E \
-      -e "s/^arrow = \{ path = \".+\" \}/arrow = \"${version}\"/g" \
-      -e "s/^parquet = \{ path = \".+\" \}/parquet = \"${version}\"/g" \
-      */Cargo.toml
-  fi
   rm -f */Cargo.toml.bak
   git add */Cargo.toml
 
diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index e777c7561c4..ac911cd1e36 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -15,8 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-require "rexml/document"
-
 class SourceTest < Test::Unit::TestCase
   include GitRunnable
   include VersionDetectable
@@ -42,23 +40,20 @@ def source(*targets)
     targets.each do |target|
       env["SOURCE_#{target}"] = "1"
     end
-    sh(env, @script, @release_version, "0")
+    output = sh(env, @script, @release_version, "0")
     sh("tar", "xf", "#{@tag_name}.tar.gz")
+    output
   end
 
   def test_symbolic_links
     source
-    Dir.chdir("#{@tag_name}") do
+    Dir.chdir(@tag_name) do
       assert_equal([],
                    Find.find(".").find_all {|path| File.symlink?(path)})
     end
   end
 
   def test_glib_configure
-    unless ENV["ARROW_TEST_SOURCE_GLIB"] == "yes"
-      omit("This takes a long time. " +
-           "Set ARROW_TEST_SOURCE_GLIB=yes environment variable to test this.")
-    end
     source("GLIB")
     Dir.chdir("#{@tag_name}/c_glib") do
       assert_equal([
@@ -102,4 +97,55 @@ def test_python_version
                    Dir.glob("dist/pyarrow-*.tar.gz"))
     end
   end
+
+  def test_vote
+    jira_url = "https://issues.apache.org/jira"
+    jql_conditions = [
+      "project = ARROW",
+      "status in (Resolved, Closed)",
+      "fixVersion = #{@release_version}",
+    ]
+    jql = jql_conditions.join(" AND ")
+    n_resolved_issues = nil
+    open("#{jira_url}/rest/api/2/search?jql=#{CGI.escape(jql)}") do |response|
+      n_resolved_issues = JSON.parse(response.read)["total"]
+    end
+    output = source("VOTE")
+    assert_equal(<<-VOTE.strip, output[/^-+$(.+?)^-+$/m, 1].strip)
+To: dev@arrow.apache.org
+Subject: [VOTE] Release Apache Arrow #{@release_version} - RC0
+
+Hi,
+
+I would like to propose the following release candidate (RC0) of Apache
+Arrow version #{@release_version}. This is a release consiting of #{n_resolved_issues}
+resolved JIRA issues[1].
+
+This release candidate is based on commit:
+#{@current_commit} [2]
+
+The source release rc0 is hosted at [3].
+The binary artifacts are hosted at [4][5][6][7].
+The changelog is located at [8].
+
+Please download, verify checksums and signatures, run the unit tests,
+and vote on the release. See [9] for how to validate a release candidate.
+
+The vote will be open for at least 72 hours.
+
+[ ] +1 Release this as Apache Arrow #{@release_version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow #{@release_version} because...
+
+[1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20#{@release_version}
+[2]: https://github.com/apache/arrow/tree/#{@current_commit}
+[3]: https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-#{@release_version}-rc0
+[4]: https://bintray.com/apache/arrow/centos-rc/#{@release_version}-rc0
+[5]: https://bintray.com/apache/arrow/debian-rc/#{@release_version}-rc0
+[6]: https://bintray.com/apache/arrow/python-rc/#{@release_version}-rc0
+[7]: https://bintray.com/apache/arrow/ubuntu-rc/#{@release_version}-rc0
+[8]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
+[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+    VOTE
+  end
 end
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index edaeec9ebe7..f70503a9f5c 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -21,9 +21,10 @@
 set -e
 
 : ${SOURCE_DEFAULT:=1}
-: ${SOURCE_UPLOAD:=${SOURCE_DEFAULT}}
 : ${SOURCE_GLIB:=${SOURCE_DEFAULT}}
 : ${SOURCE_RAT:=${SOURCE_DEFAULT}}
+: ${SOURCE_UPLOAD:=${SOURCE_DEFAULT}}
+: ${SOURCE_VOTE:=${SOURCE_DEFAULT}}
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
@@ -38,6 +39,7 @@ rc=$2
 
 tag=apache-arrow-${version}
 tagrc=${tag}-rc${rc}
+rc_url="https://dist.apache.org/repos/dist/dev/arrow/${tagrc}"
 
 echo "Preparing source for tag ${tag}"
 
@@ -61,26 +63,22 @@ rm -rf ${tag}
   git archive ${release_hash} --prefix ${tag}/) | \
   tar xf -
 
-# replace c_glib/ by tar.gz generated by "make dist"
+# Replace c_glib/ after running c_glib/autogen.sh to create c_gilb/ source archive containing the configure script
 if [ ${SOURCE_GLIB} -gt 0 ]; then
   archive_name=tmp-apache-arrow
   (cd "${SOURCE_TOP_DIR}" && \
     git archive ${release_hash} --prefix ${archive_name}/) \
-    > ${archive_name}.tar
-  dist_c_glib_tar_gz=c_glib.tar.gz
-  docker_image_name=apache-arrow/release-source
-  DEBUG=yes docker build -t ${docker_image_name} "${SOURCE_DIR}/source"
-  docker \
-    run \
-    --rm \
-    --interactive \
-    --volume "$PWD":/host \
-    ${docker_image_name} \
-    /build.sh ${archive_name} ${dist_c_glib_tar_gz}
-  rm -f ${archive_name}.tar
+    > "${SOURCE_TOP_DIR}/${archive_name}.tar"
+  c_glib_including_configure_tar_gz=c_glib.tar.gz
+  "${SOURCE_TOP_DIR}/dev/run_docker_compose.sh" \
+    release-source \
+    /arrow/dev/release/source/build.sh \
+    ${archive_name} \
+    ${c_glib_including_configure_tar_gz}
+  rm -f "${SOURCE_TOP_DIR}/${archive_name}.tar"
   rm -rf ${tag}/c_glib
-  tar xf ${dist_c_glib_tar_gz} -C ${tag}
-  rm -f ${dist_c_glib_tar_gz}
+  tar xf "${SOURCE_TOP_DIR}/${c_glib_including_configure_tar_gz}" -C ${tag}
+  rm -f "${SOURCE_TOP_DIR}/${c_glib_including_configure_tar_gz}"
 fi
 
 # Resolve all hard and symbolic links
@@ -128,54 +126,56 @@ if [ ${SOURCE_UPLOAD} -gt 0 ]; then
   # clean up
   rm -rf tmp
 
-  rc_url="https://dist.apache.org/repos/dist/dev/arrow/${tagrc}"
   echo "Success! The release candidate is available here:"
   echo "  ${rc_url}"
   echo ""
   echo "Commit SHA1: ${release_hash}"
   echo ""
+fi
+
+if [ ${SOURCE_VOTE} -gt 0 ]; then
   echo "The following draft email has been created to send to the"
   echo "dev@arrow.apache.org mailing list"
   echo ""
   echo "---------------------------------------------------------"
-  # jira_url="https://issues.apache.org/jira"
+  jira_url="https://issues.apache.org/jira"
   jql="project%20%3D%20ARROW%20AND%20status%20in%20%28Resolved%2C%20Closed%29%20AND%20fixVersion%20%3D%20${version}"
   n_resolved_issues=$(curl "${jira_url}/rest/api/2/search/?jql=${jql}" | jq ".total")
   cat <<MAIL
-  To: dev@arrow.apache.org
-  Subject: [VOTE] Release Apache Arrow ${version} - RC${rc}
+To: dev@arrow.apache.org
+Subject: [VOTE] Release Apache Arrow ${version} - RC${rc}
 
-  Hi,
+Hi,
 
-  I would like to propose the following release candidate (RC${rc}) of Apache
-  Arrow version ${version}. This is a release consiting of ${n_resolved_issues}
-  resolved JIRA issues[1].
+I would like to propose the following release candidate (RC${rc}) of Apache
+Arrow version ${version}. This is a release consiting of ${n_resolved_issues}
+resolved JIRA issues[1].
 
-  This release candidate is based on commit:
-  ${release_hash} [2]
+This release candidate is based on commit:
+${release_hash} [2]
 
-  The source release rc${rc} is hosted at [3].
-  The binary artifacts are hosted at [4][5][6][7].
-  The changelog is located at [8].
+The source release rc${rc} is hosted at [3].
+The binary artifacts are hosted at [4][5][6][7].
+The changelog is located at [8].
 
-  Please download, verify checksums and signatures, run the unit tests,
-  and vote on the release. See [9] for how to validate a release candidate.
+Please download, verify checksums and signatures, run the unit tests,
+and vote on the release. See [9] for how to validate a release candidate.
 
-  The vote will be open for at least 72 hours.
+The vote will be open for at least 72 hours.
 
-  [ ] +1 Release this as Apache Arrow ${version}
-  [ ] +0
-  [ ] -1 Do not release this as Apache Arrow ${version} because...
+[ ] +1 Release this as Apache Arrow ${version}
+[ ] +0
+[ ] -1 Do not release this as Apache Arrow ${version} because...
 
-  [1]: ${jira_url}/issues/?jql=${jql}
-  [2]: https://github.com/apache/arrow/tree/${release_hash}
-  [3]: ${rc_url}
-  [4]: https://bintray.com/apache/arrow/centos-rc/${version}-rc${rc}
-  [5]: https://bintray.com/apache/arrow/debian-rc/${version}-rc${rc}
-  [6]: https://bintray.com/apache/arrow/python-rc/${version}-rc${rc}
-  [7]: https://bintray.com/apache/arrow/ubuntu-rc/${version}-rc${rc}
-  [8]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
-  [9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[1]: ${jira_url}/issues/?jql=${jql}
+[2]: https://github.com/apache/arrow/tree/${release_hash}
+[3]: ${rc_url}
+[4]: https://bintray.com/apache/arrow/centos-rc/${version}-rc${rc}
+[5]: https://bintray.com/apache/arrow/debian-rc/${version}-rc${rc}
+[6]: https://bintray.com/apache/arrow/python-rc/${version}-rc${rc}
+[7]: https://bintray.com/apache/arrow/ubuntu-rc/${version}-rc${rc}
+[8]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
+[9]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
 MAIL
   echo "---------------------------------------------------------"
 fi
diff --git a/dev/release/03-binary.sh b/dev/release/03-binary.sh
index fa1119a3b99..22bb96df639 100755
--- a/dev/release/03-binary.sh
+++ b/dev/release/03-binary.sh
@@ -73,6 +73,8 @@ fi
 : ${BINTRAY_REPOSITORY:=apache/arrow}
 : ${SOURCE_BINTRAY_REPOSITORY:=${BINTRAY_REPOSITORY}}
 
+BINTRAY_DOWNLOAD_URL_BASE=https://dl.bintray.com
+
 docker_run() {
   docker \
     run \
@@ -88,13 +90,22 @@ docker_run() {
 docker_gpg_ssh() {
   local ssh_port=$1
   shift
-  ssh \
-    -o StrictHostKeyChecking=no \
-    -i "${docker_ssh_key}" \
-    -p ${ssh_port} \
-    -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \
-    arrow@127.0.0.1 \
-    "$@"
+  local known_hosts_file=$(mktemp -t "arrow-binary-gpg-ssh-known-hosts.XXXXX")
+  local exit_code=
+  if ssh \
+      -o StrictHostKeyChecking=no \
+      -o UserKnownHostsFile=${known_hosts_file} \
+      -i "${docker_ssh_key}" \
+      -p ${ssh_port} \
+      -R "/home/arrow/.gnupg/S.gpg-agent:${gpg_agent_extra_socket}" \
+      arrow@127.0.0.1 \
+      "$@"; then
+    exit_code=$?;
+  else
+    exit_code=$?;
+  fi
+  rm -f ${known_hosts_file}
+  return ${exit_code}
 }
 
 docker_run_gpg_ready() {
@@ -185,7 +196,7 @@ download_files() {
       --fail \
       --location \
       --output ${file} \
-      https://dl.bintray.com/${SOURCE_BINTRAY_REPOSITORY}/${file} &
+      ${BINTRAY_DOWNLOAD_URL_BASE}/${SOURCE_BINTRAY_REPOSITORY}/${file} &
   done
 }
 
@@ -235,10 +246,20 @@ sign_and_upload_file() {
   local local_path=$4
   local upload_path=$5
 
-  upload_file ${version} ${rc} ${target} ${local_path} ${upload_path}
+  local sha256=$(shasum -a 256 ${local_path} | awk '{print $1}')
+  local download_path=/${BINTRAY_REPOSITORY}/${target}-rc/${upload_path}
+  local source_upload=no
+  if ! curl \
+       --fail \
+       --head \
+       ${BINTRAY_DOWNLOAD_URL_BASE}${download_path} | \
+         grep -q "^X-Checksum-Sha2: ${sha256}"; then
+    upload_file ${version} ${rc} ${target} ${local_path} ${upload_path}
+    source_upload=yes
+  fi
 
   local suffix=
-  for suffix in asc sha256 sha512; do
+  for suffix in asc sha512; do
     pushd $(dirname ${local_path})
     local local_path_base=$(basename ${local_path})
     local output_dir=$(mktemp -d -t "arrow-binary-sign.XXXXX")
@@ -257,7 +278,18 @@ sign_and_upload_file() {
           ${local_path_base} > ${output}
         ;;
     esac
-    upload_file ${version} ${rc} ${target} ${output} ${upload_path}.${suffix}
+    local need_upload=no
+    if [ "${source_upload}" = "yes" ]; then
+      need_upload=yes
+    elif ! curl \
+           --fail \
+           --head \
+           ${BINTRAY_DOWNLOAD_URL_BASE}${download_path}.${suffix}; then
+      need_upload=yes
+    fi
+    if [ "${need_upload}" = "yes" ]; then
+      upload_file ${version} ${rc} ${target} ${output} ${upload_path}.${suffix}
+    fi
     rm -rf ${output_dir}
     popd
   done
@@ -296,6 +328,7 @@ upload_deb() {
   for base_path in *; do
     upload_deb_file ${version} ${rc} ${distribution} ${code_name} ${base_path} &
   done
+  wait
 }
 
 upload_apt() {
@@ -426,6 +459,7 @@ upload_rpm() {
       ${distribution_version} \
       ${rpm_path} &
   done
+  wait
 }
 
 upload_yum() {
@@ -495,6 +529,7 @@ upload_python() {
       ${base_path} \
       ${version}-rc${rc}/${base_path} &
   done
+  wait
 }
 
 docker build -t ${docker_image_name} ${SOURCE_DIR}/binary
diff --git a/dev/release/generate_force_push_script.py b/dev/release/generate_force_push_script.py
new file mode 100755
index 00000000000..b6cd760bc60
--- /dev/null
+++ b/dev/release/generate_force_push_script.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+##############################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+##############################################################################
+
+# This script generates a series of shell commands
+# to rebase all open pull requests off of master
+# and force push the updates.
+
+from http.client import HTTPSConnection
+import json
+from collections import defaultdict
+
+client = HTTPSConnection('api.github.com')
+client.request('GET',
+               '/repos/apache/arrow/pulls?state=open&per_page=100',
+               headers={'User-Agent': 'ApacheArrowRebaser'})
+response = client.getresponse()
+json_content = response.read()
+if response.status != 200:
+    error_msg = 'GitHub connection error:{}'.format(json_content)
+    raise Exception(error_msg)
+
+parsed_content = json.loads(json_content)
+if len(parsed_content) == 100:
+    print("# WARNING: Only the most recent 100 PRs will be processed")
+
+repos = defaultdict(list)
+for pr in parsed_content:
+    head = pr['head']
+    repos[head['repo']['full_name']].append(head['label'])
+
+for repo, labels in repos.items():
+    print('git clone git@github.com:{}.git'.format(repo))
+    print('cd arrow')
+    print('git remote add upstream https://github.com/apache/arrow.git')
+    print('git fetch --all --prune --tags --force')
+    for label in labels:
+        # Labels are in the form 'user:branch'
+        owner, branch = label.split(':')
+        print('git checkout {}'.format(branch))
+        print('(git rebase upstream/master && git push --force) || ' +
+              '(echo "Rebase failed for {}" && '.format(label) +
+              'git rebase --abort)')
+    print('cd ..')
+    print('rm -rf arrow')
diff --git a/dev/release/post-02-binary.sh b/dev/release/post-02-binary.sh
index cdff1db0ab6..d2788409a71 100755
--- a/dev/release/post-02-binary.sh
+++ b/dev/release/post-02-binary.sh
@@ -43,6 +43,8 @@ fi
 
 : ${BINTRAY_REPOSITORY:=apache/arrow}
 
+BINTRAY_DOWNLOAD_URL_BASE=https://dl.bintray.com
+
 docker_image_name=apache-arrow/release-binary
 
 bintray() {
@@ -100,9 +102,8 @@ download_files() {
       --fail \
       --location \
       --output ${file} \
-      https://dl.bintray.com/${BINTRAY_REPOSITORY}/${file} &
+      ${BINTRAY_DOWNLOAD_URL_BASE}/${BINTRAY_REPOSITORY}/${file}
   done
-  wait
 }
 
 delete_file() {
@@ -120,6 +121,15 @@ upload_file() {
   local path=$3
 
   local sha256=$(shasum -a 256 ${path} | awk '{print $1}')
+  local download_path=/${BINTRAY_REPOSITORY}/${target}/${path}
+  if curl \
+       --fail \
+       --head \
+       ${BINTRAY_DOWNLOAD_URL_BASE}${download_path} | \
+         grep -q "^X-Checksum-Sha2: ${sha256}"; then
+    return 0
+  fi
+
   local request_path=/content/${BINTRAY_REPOSITORY}/${target}/${version}/${target}/${path}
   if ! bintray \
          PUT ${request_path} \
@@ -152,9 +162,8 @@ for target in debian ubuntu centos python; do
     mv ${version}-rc${rc} ${version}
   fi
   for file in $(find . -type f); do
-    upload_file ${version} ${target} ${file} &
+    upload_file ${version} ${target} ${file}
   done
-  wait
   popd
   popd
   rm -rf ${tmp_dir}
diff --git a/dev/release/post-03-website.sh b/dev/release/post-03-website.sh
index 86a806c3b6a..826dc4e4528 100755
--- a/dev/release/post-03-website.sh
+++ b/dev/release/post-03-website.sh
@@ -21,6 +21,7 @@ set -e
 set -u
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SOURCE_TOP_DIR="${SOURCE_DIR}/../.."
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <previous-version> <version>"
@@ -30,7 +31,13 @@ fi
 previous_version=$1
 version=$2
 
-site_dir="${SOURCE_DIR}/../../site"
+pushd "${SOURCE_TOP_DIR}"
+
+branch_name=release-note-${version}
+git checkout master
+git checkout -b ${branch_name}
+
+site_dir="${SOURCE_TOP_DIR}/site"
 release_dir="${site_dir}/_release"
 announce_file="${release_dir}/${version}.md"
 versions_yml="${site_dir}/_data/versions.yml"
@@ -133,6 +140,7 @@ cat <<ANNOUNCE >> "${announce_file}"
 [5]: https://bintray.com/apache/arrow/ubuntu/${version}/
 [6]: https://github.com/apache/arrow/releases/tag/apache-arrow-${version}
 ANNOUNCE
+git add "${announce_file}"
 
 
 # Update index
@@ -187,6 +195,8 @@ for md_file in ${announce_files}; do
   echo "[${i}]: {{ site.baseurl }}/release/${html_file}" >> ${index_file}
 done
 
+git add ${index_file}
+
 popd
 
 
@@ -226,3 +236,18 @@ current:
   sha256: 'https://www.apache.org/dist/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha256'
   sha512: 'https://www.apache.org/dist/arrow/arrow-${version}/apache-arrow-${version}.tar.gz.sha512'
 YAML
+git add "${versions_yml}"
+
+git commit -m "[Website] Add release note for ${version}"
+git push -u origin ${branch_name}
+
+github_url=$(git remote get-url origin | \
+               sed \
+                 -e 's,^git@github.com:,https://github.com/,' \
+                 -e 's,\.git$,,')
+
+echo "Success!"
+echo "1. Open a JIRA issue:"
+echo "  https://issues.apache.org/jira/projects/ARROW/issues/"
+echo "2. Create a pull request:"
+echo "  ${github_url}/pull/new/${branch_name}"
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 79a1c10483b..85896fce33c 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -44,37 +44,37 @@ dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib-dev.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib14.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib0.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset-dev.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python-dev.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python14.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python0.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib14.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib0.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib14.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib0.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib-dev.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib-doc.doc-base
 dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib-doc.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib-doc.links
-dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib14.install
-dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma14.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib0.install
+dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma0.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/patches/series
 dev/tasks/linux-packages/debian.ubuntu-xenial/plasma-store-server.install
 dev/tasks/linux-packages/debian.ubuntu-xenial/rules
@@ -92,39 +92,39 @@ dev/tasks/linux-packages/debian/libarrow-glib-dev.install
 dev/tasks/linux-packages/debian/libarrow-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libarrow-glib-doc.install
 dev/tasks/linux-packages/debian/libarrow-glib-doc.links
-dev/tasks/linux-packages/debian/libarrow-glib14.install
+dev/tasks/linux-packages/debian/libarrow-glib0.install
 dev/tasks/linux-packages/debian/libarrow-cuda-dev.install
 dev/tasks/linux-packages/debian/libarrow-cuda-glib-dev.install
-dev/tasks/linux-packages/debian/libarrow-cuda-glib14.install
-dev/tasks/linux-packages/debian/libarrow-cuda14.install
+dev/tasks/linux-packages/debian/libarrow-cuda-glib0.install
+dev/tasks/linux-packages/debian/libarrow-cuda0.install
 dev/tasks/linux-packages/debian/libarrow-dataset-dev.install
-dev/tasks/linux-packages/debian/libarrow-dataset14.install
+dev/tasks/linux-packages/debian/libarrow-dataset0.install
 dev/tasks/linux-packages/debian/libarrow-flight-dev.install
-dev/tasks/linux-packages/debian/libarrow-flight14.install
+dev/tasks/linux-packages/debian/libarrow-flight0.install
 dev/tasks/linux-packages/debian/libarrow-python-dev.install
-dev/tasks/linux-packages/debian/libarrow-python14.install
-dev/tasks/linux-packages/debian/libarrow14.install
+dev/tasks/linux-packages/debian/libarrow-python0.install
+dev/tasks/linux-packages/debian/libarrow0.install
 dev/tasks/linux-packages/debian/libgandiva-dev.install
 dev/tasks/linux-packages/debian/libgandiva-glib-dev.install
 dev/tasks/linux-packages/debian/libgandiva-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libgandiva-glib-doc.install
 dev/tasks/linux-packages/debian/libgandiva-glib-doc.links
-dev/tasks/linux-packages/debian/libgandiva-glib14.install
-dev/tasks/linux-packages/debian/libgandiva14.install
+dev/tasks/linux-packages/debian/libgandiva-glib0.install
+dev/tasks/linux-packages/debian/libgandiva0.install
 dev/tasks/linux-packages/debian/libparquet-dev.install
 dev/tasks/linux-packages/debian/libparquet-glib-dev.install
 dev/tasks/linux-packages/debian/libparquet-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libparquet-glib-doc.install
 dev/tasks/linux-packages/debian/libparquet-glib-doc.links
-dev/tasks/linux-packages/debian/libparquet-glib14.install
-dev/tasks/linux-packages/debian/libparquet14.install
+dev/tasks/linux-packages/debian/libparquet-glib0.install
+dev/tasks/linux-packages/debian/libparquet0.install
 dev/tasks/linux-packages/debian/libplasma-dev.install
 dev/tasks/linux-packages/debian/libplasma-glib-dev.install
 dev/tasks/linux-packages/debian/libplasma-glib-doc.doc-base
 dev/tasks/linux-packages/debian/libplasma-glib-doc.install
 dev/tasks/linux-packages/debian/libplasma-glib-doc.links
-dev/tasks/linux-packages/debian/libplasma-glib14.install
-dev/tasks/linux-packages/debian/libplasma14.install
+dev/tasks/linux-packages/debian/libplasma-glib0.install
+dev/tasks/linux-packages/debian/libplasma0.install
 dev/tasks/linux-packages/debian/patches/series
 dev/tasks/linux-packages/debian/plasma-store-server.install
 dev/tasks/linux-packages/debian/rules
diff --git a/dev/release/source/Dockerfile b/dev/release/source/Dockerfile
index 9085cef3327..7d5453b80c4 100644
--- a/dev/release/source/Dockerfile
+++ b/dev/release/source/Dockerfile
@@ -15,38 +15,17 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM ubuntu:18.04
+FROM debian:buster
 
 ENV DEBIAN_FRONTEND noninteractive
 
-RUN apt-get update -y -q && \
-  apt-get install -y -q --no-install-recommends wget software-properties-common gpg-agent && \
-  wget --quiet -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-  apt-add-repository -y "deb http://apt.llvm.org/bionic llvm-toolchain-bionic-7 main" && \
-  apt-get -y install clang-7
-
 RUN apt update && \
   apt install -y -V \
     autoconf-archive \
-    bison \
-    cmake \
-    flex \
-    g++ \
-    gcc \
     gtk-doc-tools \
-    libboost-filesystem-dev \
-    libboost-regex-dev \
-    libboost-system-dev \
     libgirepository1.0-dev \
     libglib2.0-doc \
-    libprotobuf-dev \
-    libprotoc-dev \
     libtool \
-    lsb-release \
-    make \
-    pkg-config \
-    protobuf-compiler && \
+    pkg-config && \
   apt clean && \
   rm -rf /var/lib/apt/lists/*
-
-COPY build.sh /build.sh
diff --git a/dev/release/source/build.sh b/dev/release/source/build.sh
index 20c972466b4..558600e1fb7 100755
--- a/dev/release/source/build.sh
+++ b/dev/release/source/build.sh
@@ -20,35 +20,14 @@
 set -e
 
 archive_name=$1
-dist_c_glib_tar_gz=$2
+c_glib_including_configure_tar_gz=$2
 
-tar xf /host/${archive_name}.tar
+tar xf /arrow/${archive_name}.tar
 
-# build Apache Arrow C++ before building Apache Arrow GLib because
-# Apache Arrow GLib requires Apache Arrow C++.
-mkdir -p ${archive_name}/cpp/build
-cpp_install_dir=${PWD}/${archive_name}/cpp/install
-cd ${archive_name}/cpp/build
-cmake .. \
-  -DCMAKE_INSTALL_PREFIX=${cpp_install_dir} \
-  -DCMAKE_INSTALL_LIBDIR=lib \
-  -DARROW_PLASMA=yes \
-  -DARROW_GANDIVA=yes \
-  -DARROW_PARQUET=yes
-make -j8
-make install
-cd -
-
-# build source archive for Apache Arrow GLib by "make dist".
+# Run autogen.sh to create c_glib/ source archive containing the configure script
 cd ${archive_name}/c_glib
 ./autogen.sh
-./configure \
-  PKG_CONFIG_PATH=${cpp_install_dir}/lib/pkgconfig \
-  --enable-gtk-doc
-LD_LIBRARY_PATH=${cpp_install_dir}/lib make -j8
-make dist
-tar xzf *.tar.gz
-rm *.tar.gz
+rm -rf autom4te.cache
 cd -
-mv ${archive_name}/c_glib/apache-arrow-glib-* c_glib/
-tar czf /host/${dist_c_glib_tar_gz} c_glib
+mv ${archive_name}/c_glib/ c_glib/
+tar czf /arrow/${c_glib_including_configure_tar_gz} c_glib
diff --git a/dev/release/test-helper.rb b/dev/release/test-helper.rb
index 1dad54390cf..a5d3046b7ac 100644
--- a/dev/release/test-helper.rb
+++ b/dev/release/test-helper.rb
@@ -16,7 +16,11 @@
 # under the License.
 
 require "English"
+require "cgi/util"
 require "find"
+require "json"
+require "open-uri"
+require "rexml/document"
 require "tempfile"
 require "tmpdir"
 
@@ -73,6 +77,7 @@ def detect_versions
     @snapshot_version = cpp_cmake_lists.read[/ARROW_VERSION "(.+?)"/, 1]
     @release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "")
     @next_version = @release_version.gsub(/\A\d+/) {|major| major.succ}
+    @next_snapshot_version = "#{@next_version}-SNAPSHOT"
     r_description = top_dir + "r" + "DESCRIPTION"
     @previous_version = r_description.read[/^Version: (.+?)\.9000$/, 1]
   end
diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh
index 6cca89c0000..79b4c19056d 100755
--- a/dev/release/verify-apt.sh
+++ b/dev/release/verify-apt.sh
@@ -126,7 +126,7 @@ fi
 
 apt install -y -V libplasma-glib-dev=${deb_version}
 apt install -y -V libplasma-glib-doc=${deb_version}
-# apt install -y -V plasma-store-server=${deb_version}
+apt install -y -V plasma-store-server=${deb_version}
 
 if [ "${have_gandiva}" = "yes" ]; then
   apt install -y -V libgandiva-glib-dev=${deb_version}
diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat
index 3f6d95c17cf..299297e95c5 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -75,6 +75,7 @@ cmake -G "%GENERATOR%" ^
       -DGTest_SOURCE=BUNDLED ^
       -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
       -DARROW_CXXFLAGS="/MP" ^
+      -DARROW_FLIGHT=ON ^
       -DARROW_PYTHON=ON ^
       -DARROW_PARQUET=ON ^
       ..  || exit /B
@@ -94,11 +95,11 @@ ctest -VV  || exit /B
 popd
 
 @rem Build and import pyarrow
-@rem parquet-cpp has some additional runtime dependencies that we need to figure out
-@rem see PARQUET-1018
 pushd %ARROW_SOURCE%\python
 
-python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp bdist_wheel  || exit /B
+set PYARROW_WITH_FLIGHT=1
+set PYARROW_WITH_PARQUET=1
+python setup.py build_ext --inplace --bundle-arrow-cpp bdist_wheel  || exit /B
 py.test pyarrow -v -s --parquet || exit /B
 
 popd
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 159d7dde7eb..0acb56e4d8a 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -46,7 +46,6 @@ case $# in
 esac
 
 set -e
-set -u
 set -x
 set -o pipefail
 
@@ -130,9 +129,8 @@ download_bintray_files() {
       --fail \
       --location \
       --output ${file} \
-      https://dl.bintray.com/${BINTRAY_REPOSITORY}/${file} &
+      https://dl.bintray.com/${BINTRAY_REPOSITORY}/${file}
   done
-  wait
 }
 
 test_binary() {
@@ -167,7 +165,6 @@ test_binary() {
 test_apt() {
   for target in debian-stretch \
                 debian-buster \
-                ubuntu-trusty \
                 ubuntu-xenial \
                 ubuntu-bionic \
                 ubuntu-cosmic \
@@ -458,6 +455,14 @@ test_rust() {
   # we are targeting Rust nightly for releases
   rustup default nightly
 
+  # use local modules because we don't publish modules to crates.io yet
+  sed \
+    -i.bak \
+    -E \
+    -e 's/^arrow = "([^"]*)"/arrow = { version = "\1", path = "..\/arrow" }/g' \
+    -e 's/^parquet = "([^"]*)"/parquet = { version = "\1", path = "..\/parquet" }/g' \
+    */Cargo.toml
+
   # raises on any warnings
   RUSTFLAGS="-D warnings" cargo build
   cargo test
@@ -481,7 +486,10 @@ test_integration() {
     INTEGRATION_TEST_ARGS=--run_flight
   fi
 
-  python integration_test.py $INTEGRATION_TEST_ARGS
+  # Flight integration test executable have runtime dependency on
+  # release/libgtest.so
+  LD_LIBRARY_PATH=$ARROW_CPP_EXE_PATH:$LD_LIBRARY_PATH \
+      python integration_test.py $INTEGRATION_TEST_ARGS
 
   popd
 }
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index 3fd43f3c859..4638980f128 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -39,6 +39,7 @@ requirements:
     - re2
     - snappy
     - thrift-cpp >=0.11
+    - uriparser
     - zlib
     - zstd
 
@@ -55,6 +56,7 @@ requirements:
     - python
     - re2
     - snappy
+    - uriparser
     - zlib
     - zstd
 
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/changelog b/dev/tasks/linux-packages/debian.ubuntu-xenial/changelog
index e7fce68b432..e9263f67c72 100644
--- a/dev/tasks/linux-packages/debian.ubuntu-xenial/changelog
+++ b/dev/tasks/linux-packages/debian.ubuntu-xenial/changelog
@@ -1,3 +1,9 @@
+apache-arrow (0.14.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Tue, 16 Jul 2019 20:35:49 -0000
+
 apache-arrow (0.14.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/control b/dev/tasks/linux-packages/debian.ubuntu-xenial/control
index d95d6bac6a7..f1a46b6386e 100644
--- a/dev/tasks/linux-packages/debian.ubuntu-xenial/control
+++ b/dev/tasks/linux-packages/debian.ubuntu-xenial/control
@@ -36,7 +36,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.6
 Homepage: https://arrow.apache.org/
 
-Package: libarrow14
+Package: libarrow0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -48,7 +48,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files.
 
-Package: libarrow-cuda14
+Package: libarrow-cuda0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -56,12 +56,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-dataset14
+Package: libarrow-dataset0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -69,12 +69,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Datasets module.
 
-Package: libarrow-python14
+Package: libarrow-python0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -82,7 +82,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version}),
+  libarrow0 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
@@ -95,7 +95,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files.
@@ -107,7 +107,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-cuda0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -119,7 +119,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-dataset14 (= ${binary:Version})
+  libarrow-dataset0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Datasets module.
@@ -131,12 +131,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python14 (= ${binary:Version})
+  libarrow-python0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Python support.
 
-Package: libgandiva14
+Package: libgandiva0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -144,7 +144,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -157,13 +157,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva14 (= ${binary:Version})
+  libgandiva0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libplasma14
+Package: libplasma0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -171,7 +171,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-cuda0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ library files to connect plasma_store_server.
@@ -183,7 +183,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libplasma14 (= ${binary:Version})
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides plasma_store_server.
@@ -195,12 +195,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
-  libplasma14 (= ${binary:Version})
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ header files.
 
-Package: libparquet14
+Package: libparquet0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -219,12 +219,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet14 (= ${binary:Version})
+  libparquet0 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib14
+Package: libarrow-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -232,7 +232,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -256,7 +256,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib14 (= ${binary:Version}),
+  libarrow-glib0 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -274,7 +274,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib14
+Package: libarrow-cuda-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -282,8 +282,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libarrow-cuda0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -307,13 +307,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib14 (= ${binary:Version}),
+  libarrow-cuda-glib0 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libgandiva-glib14
+Package: libgandiva-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -321,8 +321,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libgandiva14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libgandiva0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -348,7 +348,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib14 (= ${binary:Version}),
+  libgandiva-glib0 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -367,7 +367,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libplasma-glib14
+Package: libplasma-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -375,8 +375,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda-glib14 (= ${binary:Version}),
-  libplasma14 (= ${binary:Version})
+  libarrow-cuda-glib0 (= ${binary:Version}),
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides GLib based library files to connect plasma_store_server.
@@ -400,7 +400,7 @@ Depends:
   ${misc:Depends},
   libplasma-dev (= ${binary:Version}),
   libarrow-cuda-glib-dev (= ${binary:Version}),
-  libplasma-glib14 (= ${binary:Version}),
+  libplasma-glib0 (= ${binary:Version}),
   gir1.2-plasma-1.0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
@@ -417,7 +417,7 @@ Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides documentations.
 
-Package: libparquet-glib14
+Package: libparquet-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -425,8 +425,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libparquet14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libparquet0 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -450,7 +450,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib14 (= ${binary:Version}),
+  libparquet-glib0 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda-glib0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-cuda0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-dataset0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-glib0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow-python0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libarrow0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva-glib0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libgandiva0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet-glib0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libparquet0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma-glib0.install
diff --git a/dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma14.install b/dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma14.install
rename to dev/tasks/linux-packages/debian.ubuntu-xenial/libplasma0.install
diff --git a/dev/tasks/linux-packages/debian/changelog b/dev/tasks/linux-packages/debian/changelog
index e7fce68b432..e9263f67c72 100644
--- a/dev/tasks/linux-packages/debian/changelog
+++ b/dev/tasks/linux-packages/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (0.14.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Krisztián Szűcs <szucs.krisztian@gmail.com>  Tue, 16 Jul 2019 20:35:49 -0000
+
 apache-arrow (0.14.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/debian/control b/dev/tasks/linux-packages/debian/control
index a4170879ca0..9b5d81d1f70 100644
--- a/dev/tasks/linux-packages/debian/control
+++ b/dev/tasks/linux-packages/debian/control
@@ -34,7 +34,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.6
 Homepage: https://arrow.apache.org/
 
-Package: libarrow14
+Package: libarrow0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -46,7 +46,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files.
 
-Package: libarrow-cuda14
+Package: libarrow-cuda0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -54,12 +54,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-dataset14
+Package: libarrow-dataset0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -67,12 +67,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Datasets module.
 
-Package: libarrow-flight14
+Package: libarrow-flight0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -80,12 +80,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-python14
+Package: libarrow-python0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -93,7 +93,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version}),
+  libarrow0 (= ${binary:Version}),
   python3,
   python3-numpy
 Description: Apache Arrow is a data processing library for analysis
@@ -106,7 +106,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files.
@@ -118,7 +118,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-cuda0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -130,7 +130,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-dataset14 (= ${binary:Version})
+  libarrow-dataset0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Datasets module.
@@ -142,7 +142,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight14 (= ${binary:Version})
+  libarrow-flight0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.
@@ -154,12 +154,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-python14 (= ${binary:Version})
+  libarrow-python0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Python support.
 
-Package: libgandiva14
+Package: libgandiva0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -167,7 +167,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -180,13 +180,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva14 (= ${binary:Version})
+  libgandiva0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libplasma14
+Package: libplasma0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -194,7 +194,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-cuda0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ library files to connect plasma_store_server.
@@ -206,7 +206,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libplasma14 (= ${binary:Version})
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides plasma_store_server.
@@ -218,12 +218,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
-  libplasma14 (= ${binary:Version})
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides C++ header files.
 
-Package: libparquet14
+Package: libparquet0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -242,12 +242,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet14 (= ${binary:Version})
+  libparquet0 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib14
+Package: libarrow-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -255,7 +255,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow14 (= ${binary:Version})
+  libarrow0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -279,7 +279,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-dev (= ${binary:Version}),
-  libarrow-glib14 (= ${binary:Version}),
+  libarrow-glib0 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -297,7 +297,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib14
+Package: libarrow-cuda-glib0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -305,8 +305,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libarrow-cuda14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libarrow-cuda0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -330,13 +330,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib14 (= ${binary:Version}),
+  libarrow-cuda-glib0 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libgandiva-glib14
+Package: libgandiva-glib0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -344,8 +344,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libgandiva14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libgandiva0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -371,7 +371,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib14 (= ${binary:Version}),
+  libgandiva-glib0 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -390,7 +390,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libplasma-glib14
+Package: libplasma-glib0
 Section: libs
 Architecture: i386 amd64
 Multi-Arch: same
@@ -398,8 +398,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-cuda-glib14 (= ${binary:Version}),
-  libplasma14 (= ${binary:Version})
+  libarrow-cuda-glib0 (= ${binary:Version}),
+  libplasma0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides GLib based library files to connect plasma_store_server.
@@ -423,7 +423,7 @@ Depends:
   ${misc:Depends},
   libplasma-dev (= ${binary:Version}),
   libarrow-cuda-glib-dev (= ${binary:Version}),
-  libplasma-glib14 (= ${binary:Version}),
+  libplasma-glib0 (= ${binary:Version}),
   gir1.2-plasma-1.0 (= ${binary:Version})
 Description: Plasma is an in-memory object store and cache for big data.
  .
@@ -440,7 +440,7 @@ Description: Plasma is an in-memory object store and cache for big data.
  .
  This package provides documentations.
 
-Package: libparquet-glib14
+Package: libparquet-glib0
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -448,8 +448,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib14 (= ${binary:Version}),
-  libparquet14 (= ${binary:Version})
+  libarrow-glib0 (= ${binary:Version}),
+  libparquet0 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -473,7 +473,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib14 (= ${binary:Version}),
+  libparquet-glib0 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda-glib14.install b/dev/tasks/linux-packages/debian/libarrow-cuda-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-cuda-glib14.install
rename to dev/tasks/linux-packages/debian/libarrow-cuda-glib0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-cuda14.install b/dev/tasks/linux-packages/debian/libarrow-cuda0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-cuda14.install
rename to dev/tasks/linux-packages/debian/libarrow-cuda0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-dataset14.install b/dev/tasks/linux-packages/debian/libarrow-dataset0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-dataset14.install
rename to dev/tasks/linux-packages/debian/libarrow-dataset0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-flight14.install b/dev/tasks/linux-packages/debian/libarrow-flight0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-flight14.install
rename to dev/tasks/linux-packages/debian/libarrow-flight0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-glib14.install b/dev/tasks/linux-packages/debian/libarrow-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-glib14.install
rename to dev/tasks/linux-packages/debian/libarrow-glib0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow-python14.install b/dev/tasks/linux-packages/debian/libarrow-python0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow-python14.install
rename to dev/tasks/linux-packages/debian/libarrow-python0.install
diff --git a/dev/tasks/linux-packages/debian/libarrow14.install b/dev/tasks/linux-packages/debian/libarrow0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libarrow14.install
rename to dev/tasks/linux-packages/debian/libarrow0.install
diff --git a/dev/tasks/linux-packages/debian/libgandiva-glib14.install b/dev/tasks/linux-packages/debian/libgandiva-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libgandiva-glib14.install
rename to dev/tasks/linux-packages/debian/libgandiva-glib0.install
diff --git a/dev/tasks/linux-packages/debian/libgandiva14.install b/dev/tasks/linux-packages/debian/libgandiva0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libgandiva14.install
rename to dev/tasks/linux-packages/debian/libgandiva0.install
diff --git a/dev/tasks/linux-packages/debian/libparquet-glib14.install b/dev/tasks/linux-packages/debian/libparquet-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libparquet-glib14.install
rename to dev/tasks/linux-packages/debian/libparquet-glib0.install
diff --git a/dev/tasks/linux-packages/debian/libparquet14.install b/dev/tasks/linux-packages/debian/libparquet0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libparquet14.install
rename to dev/tasks/linux-packages/debian/libparquet0.install
diff --git a/dev/tasks/linux-packages/debian/libplasma-glib14.install b/dev/tasks/linux-packages/debian/libplasma-glib0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libplasma-glib14.install
rename to dev/tasks/linux-packages/debian/libplasma-glib0.install
diff --git a/dev/tasks/linux-packages/debian/libplasma14.install b/dev/tasks/linux-packages/debian/libplasma0.install
similarity index 100%
rename from dev/tasks/linux-packages/debian/libplasma14.install
rename to dev/tasks/linux-packages/debian/libplasma0.install
diff --git a/dev/tasks/linux-packages/yum/arrow.spec.in b/dev/tasks/linux-packages/yum/arrow.spec.in
index 740699966ba..0c5e8912fd8 100644
--- a/dev/tasks/linux-packages/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/yum/arrow.spec.in
@@ -449,6 +449,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Tue Jul 16 2019 Krisztián Szűcs <szucs.krisztian@gmail.com> - 0.14.1-1
+- New upstream release.
+
 * Fri Jun 28 2019 Sutou Kouhei <kou@clear-code.com> - 0.14.0-1
 - New upstream release.
 
diff --git a/dev/tasks/python-wheels/manylinux-test.sh b/dev/tasks/python-wheels/manylinux-test.sh
index f5056fb84ae..a0735f3d2da 100755
--- a/dev/tasks/python-wheels/manylinux-test.sh
+++ b/dev/tasks/python-wheels/manylinux-test.sh
@@ -19,13 +19,23 @@
 
 set -e
 
+export ARROW_TEST_DATA=/arrow/testing/data
+
+python --version
 # Install built wheel
 pip install -q /arrow/python/$WHEEL_TAG/dist/*.whl
+# Install test dependencies (pip won't work after removing system zlib)
+pip install -q -r /arrow/python/requirements-test.txt
+# Run pyarrow tests
+pytest -rs --pyargs pyarrow
 
-# Runs tests on installed distribution from an empty directory
-python --version
+if [[ "$1" == "--remove-system-libs" ]]; then
+  # Run import tests after removing the bundled dependencies from the system
+  echo "Removing the following libraries to fail loudly if they are bundled incorrectly:"
+  ldconfig -p | grep "lib\(lz4\|z\|boost\)" | awk -F'> ' '{print $2}' | xargs rm -v -f
+fi
 
-# Test optional dependencies
+# Test import and optional dependencies
 python -c "
 import sys
 import pyarrow
@@ -37,9 +47,3 @@ if sys.version_info.major > 2:
     import pyarrow.flight
     import pyarrow.gandiva
 "
-
-export ARROW_TEST_DATA=/arrow/testing/data
-
-# Run pyarrow tests
-pip install -q -r /arrow/python/requirements-test.txt
-pytest -v --pyargs pyarrow
diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh
index b299646add7..3dd3ccb2abc 100755
--- a/dev/tasks/python-wheels/osx-build.sh
+++ b/dev/tasks/python-wheels/osx-build.sh
@@ -137,7 +137,9 @@ function build_wheel {
           -DARROW_FLIGHT=ON \
           -DgRPC_SOURCE=SYSTEM \
           -Dc-ares_SOURCE=BUNDLED \
+          -Dzlib_SOURCE=BUNDLED \
           -DARROW_PROTOBUF_USE_SHARED=OFF \
+          -DOPENSSL_USE_STATIC_LIBS=ON  \
           -DMAKE=make \
           ..
     make -j5
@@ -172,21 +174,32 @@ function build_wheel {
     popd
 }
 
-# overrides multibuild's default install_run
-function install_run {
+function install_wheel {
     multibuild_dir=`realpath $MULTIBUILD_DIR`
 
     pushd $1  # enter arrow's directory
-
     wheelhouse="$PWD/python/dist"
 
     # Install compatible wheel
     pip install $(pip_opts) \
         $(python $multibuild_dir/supported_wheels.py $wheelhouse/*.whl)
 
-    # Runs tests on installed distribution from an empty directory
-    python --version
+    popd
+}
+
+function run_unit_tests {
+    pushd $1
+
+    # Install test dependencies
+    pip install $(pip_opts) -r python/requirements-test.txt
+
+    # Run pyarrow tests
+    pytest -rs --pyargs pyarrow
+
+    popd
+}
 
+function run_import_tests {
     # Test optional dependencies
     python -c "
 import sys
@@ -199,11 +212,4 @@ if sys.version_info.major > 2:
     import pyarrow.flight
     import pyarrow.gandiva
 "
-
-    # Run pyarrow tests
-    pip install $(pip_opts) -r python/requirements-test.txt
-
-    py.test --pyargs pyarrow
-
-    popd
 }
diff --git a/dev/tasks/python-wheels/travis.manylinux.yml b/dev/tasks/python-wheels/travis.linux.yml
similarity index 79%
rename from dev/tasks/python-wheels/travis.manylinux.yml
rename to dev/tasks/python-wheels/travis.linux.yml
index e3670e43198..13121a484d6 100644
--- a/dev/tasks/python-wheels/travis.manylinux.yml
+++ b/dev/tasks/python-wheels/travis.linux.yml
@@ -58,16 +58,24 @@ script:
       -e PYTHON_VERSION="{{ python_version }}"
       -e UNICODE_WIDTH="{{ unicode_width }}"
       $BUILD_IMAGE
-  - popd
+
+  # run auditwheel, it does always exit with 0 so it is mostly for debugging
+  # purposes
+  - docker run -v `pwd`:/arrow quay.io/pypa/{{ wheel_tag }}_x86_64 /bin/bash -c
+      "auditwheel show /arrow/python/{{ wheel_tag }}/dist/*.whl"
 
   # test on multiple distributions
   {%- for image in test_docker_images %}
-  - docker run -it --shm-size 2G --volume $(pwd)/arrow:/arrow
-      --env WHEEL_TAG="{{ wheel_tag }}"
-      {{ image }}
-      /arrow/dev/tasks/python-wheels/manylinux-test.sh
+  - docker run -it --shm-size 2G -v `pwd`:/arrow -e WHEEL_TAG="{{ wheel_tag }}"
+    {%- if test_remove_system_libs %}
+      {{ image }} /arrow/dev/tasks/python-wheels/manylinux-test.sh --remove-system-libs
+    {%- else %}
+      {{ image }} /arrow/dev/tasks/python-wheels/manylinux-test.sh
+    {%- endif %}
   {%- endfor %}
 
+  - popd
+
   # prepare for deployment
   - sudo mv arrow/python/{{ wheel_tag }}/dist/* dist/
 
diff --git a/dev/tasks/python-wheels/travis.osx.yml b/dev/tasks/python-wheels/travis.osx.yml
index 980d122305f..85026426612 100644
--- a/dev/tasks/python-wheels/travis.osx.yml
+++ b/dev/tasks/python-wheels/travis.osx.yml
@@ -47,7 +47,7 @@ before_install:
   - brew uninstall boost cgal postgis sfcgal
   - brew update
   - brew upgrade cmake
-  - brew install bison flex grpc openssl llvm@7
+  - brew install bison flex grpc openssl llvm@7 zlib
   # Remove shared grpc libraries installed by brew to make sure
   # we are linked against the static ones.
   - rm -f /usr/local/opt/grpc/lib/*.dylib
@@ -68,11 +68,20 @@ install:
 
   # test the built wheels, remove llvm and grpc dependencies to ensure
   # things are properly statically-linked
-  - brew uninstall llvm@7 grpc c-ares
-  - install_run arrow
+  - brew uninstall --ignore-dependencies llvm@7 grpc c-ares openssl zlib
+  # install the built wheel and test dependencies
+  - install_wheel arrow
+  # run unit tests before removing the system libraries
+  - run_unit_tests arrow
+  # remove libz to ensure that it is properly bundled
+  - sudo find /usr -name libz.* -delete
+  # run the import tests
+  - run_import_tests
 
   # move built wheels to a top level directory
   - mv -v arrow/python/dist/* dist/
+  # reinstall openssl because travis' deployment script depends on it
+  - brew install openssl
 
 deploy:
   provider: releases
diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat
index 56b009705f9..14b5bb50be7 100644
--- a/dev/tasks/python-wheels/win-build.bat
+++ b/dev/tasks/python-wheels/win-build.bat
@@ -53,6 +53,8 @@ cmake -G "%GENERATOR%" ^
       -DARROW_PYTHON=ON ^
       -DARROW_PARQUET=ON ^
       -DARROW_GANDIVA=ON ^
+      -Duriparser_SOURCE=BUNDLED ^
+      -Dzlib_SOURCE=BUNDLED ^
       .. || exit /B
 cmake --build . --target install --config Release || exit /B
 popd
@@ -79,7 +81,7 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data
 @rem test the wheel
 @rem TODO For maximum reliability, we should test in a plain virtualenv instead.
 call conda create -n wheel-test -q -y python=%PYTHON_VERSION% ^
-      numpy=%NUMPY_VERSION% pandas pytest hypothesis || exit /B
+      numpy=%NUMPY_VERSION% pandas cython pytest hypothesis || exit /B
 call activate wheel-test
 
 @rem install the built wheel
@@ -89,4 +91,4 @@ pip install -vv --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow || exit
 python -c "import pyarrow; import pyarrow.parquet; import pyarrow.flight; import pyarrow.gandiva;" || exit /B
 
 @rem run the python tests
-pytest --pyargs pyarrow || exit /B
+pytest -rs --pyargs pyarrow || exit /B
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6d0640a058d..9c7cd638a7c 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -161,128 +161,138 @@ tasks:
   wheel-manylinux1-cp27m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 2.7
       unicode_width: 16
       wheel_tag: manylinux1
       test_docker_images: []
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp27-cp27m-manylinux1_x86_64.whl
 
   wheel-manylinux1-cp27mu:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 2.7
       unicode_width: 32
       wheel_tag: manylinux1
       test_docker_images:
-        - python:2.7-slim  # debian ucs4
+        - python:2.7  # debian ucs4
+      test_remove_system_libs: false
     artifacts:
       - pyarrow-{no_rc_version}-cp27-cp27mu-manylinux1_x86_64.whl
 
   wheel-manylinux1-cp35m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.5
       unicode_width: 16
       wheel_tag: manylinux1
       test_docker_images:
-        - python:3.5-slim
+        - python:3.5
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp35-cp35m-manylinux1_x86_64.whl
 
   wheel-manylinux1-cp36m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.6
       unicode_width: 16
       wheel_tag: manylinux1
       test_docker_images:
-        - python:3.6-slim
+        - python:3.6
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp36-cp36m-manylinux1_x86_64.whl
 
   wheel-manylinux1-cp37m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.7
       unicode_width: 16
       wheel_tag: manylinux1
       test_docker_images:
-        - python:3.7-slim
+        - python:3.7
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp37-cp37m-manylinux1_x86_64.whl
 
   wheel-manylinux2010-cp27m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 2.7
       unicode_width: 16
       wheel_tag: manylinux2010
       test_docker_images: []
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp27-cp27m-manylinux2010_x86_64.whl
 
   wheel-manylinux2010-cp27mu:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 2.7
       unicode_width: 32
       wheel_tag: manylinux2010
       test_docker_images:
-        - python:2.7-slim  # debian ucs4
+        - python:2.7  # debian ucs4
+      test_remove_system_libs: false
     artifacts:
       - pyarrow-{no_rc_version}-cp27-cp27mu-manylinux2010_x86_64.whl
 
   wheel-manylinux2010-cp35m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.5
       unicode_width: 16
       wheel_tag: manylinux2010
       test_docker_images:
-        - python:3.5-slim
+        - python:3.5
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp35-cp35m-manylinux2010_x86_64.whl
 
   wheel-manylinux2010-cp36m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.6
       unicode_width: 16
       wheel_tag: manylinux2010
       test_docker_images:
-        - python:3.6-slim
+        - python:3.6
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp36-cp36m-manylinux2010_x86_64.whl
 
   wheel-manylinux2010-cp37m:
     ci: travis
     platform: linux
-    template: python-wheels/travis.manylinux.yml
+    template: python-wheels/travis.linux.yml
     params:
       python_version: 3.7
       unicode_width: 16
       wheel_tag: manylinux2010
       test_docker_images:
-        - python:3.7-slim
+        - python:3.7
+      test_remove_system_libs: true
     artifacts:
       - pyarrow-{no_rc_version}-cp37-cp37m-manylinux2010_x86_64.whl
 
@@ -378,28 +388,28 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14-dbgsym_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14_{no_rc_version}-1_amd64.deb
       - libgandiva14-dbgsym_{no_rc_version}-1_amd64.deb
@@ -407,13 +417,13 @@ tasks:
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
-      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib-doc_{no_rc_version}-1_amd64.deb
       - libplasma-glib14-dbgsym_{no_rc_version}-1_amd64.deb
       - libplasma-glib14_{no_rc_version}-1_amd64.deb
       - libplasma14-dbgsym_{no_rc_version}-1_amd64.deb
@@ -441,27 +451,27 @@ tasks:
       - gir1.2-parquet-1.0_{no_rc_version}-1_amd64.deb
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
 
   debian-buster:
     ci: travis
@@ -486,28 +496,28 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14-dbgsym_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14_{no_rc_version}-1_amd64.deb
       - libgandiva14-dbgsym_{no_rc_version}-1_amd64.deb
@@ -515,13 +525,13 @@ tasks:
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
-      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib-doc_{no_rc_version}-1_amd64.deb
       - libplasma-glib14-dbgsym_{no_rc_version}-1_amd64.deb
       - libplasma-glib14_{no_rc_version}-1_amd64.deb
       - libplasma14-dbgsym_{no_rc_version}-1_amd64.deb
@@ -552,32 +562,32 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-doc_{no_rc_version}-1_all.deb
-      - libgandiva-glib14_{no_rc_version}-1_amd64.deb
-      - libgandiva14_{no_rc_version}-1_amd64.deb
+      - libgandiva-glib0_{no_rc_version}-1_amd64.deb
+      - libgandiva0_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-doc_{no_rc_version}-1_all.deb
-      - libplasma-glib14_{no_rc_version}-1_amd64.deb
-      - libplasma14_{no_rc_version}-1_amd64.deb
+      - libplasma-glib0_{no_rc_version}-1_amd64.deb
+      - libplasma0_{no_rc_version}-1_amd64.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_amd64.deb
       - plasma-store-server_{no_rc_version}-1_amd64.deb
 
@@ -604,33 +614,33 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14_{no_rc_version}-1_amd64.deb
       - libgandiva14_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
-      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib-doc_{no_rc_version}-1_amd64.deb
       - libplasma-glib14_{no_rc_version}-1_amd64.deb
       - libplasma14_{no_rc_version}-1_amd64.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_amd64.deb
@@ -659,33 +669,33 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14_{no_rc_version}-1_amd64.deb
       - libgandiva14_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
-      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib-doc_{no_rc_version}-1_amd64.deb
       - libplasma-glib14_{no_rc_version}-1_amd64.deb
       - libplasma14_{no_rc_version}-1_amd64.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_amd64.deb
@@ -714,33 +724,33 @@ tasks:
       - libarrow-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-dev_{no_rc_version}-1_amd64.deb
       - libarrow-glib-doc_{no_rc_version}-1_all.deb
-      - libarrow-glib14_{no_rc_version}-1_amd64.deb
+      - libarrow-glib0_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-dev_{no_rc_version}-1_amd64.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda-glib14_{no_rc_version}-1_amd64.deb
-      - libarrow-cuda14_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda-glib0_{no_rc_version}-1_amd64.deb
+      - libarrow-cuda0_{no_rc_version}-1_amd64.deb
       - libarrow-dataset-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-dataset14_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-dataset0_{no_rc_version}-1_amd64.deb
       - libarrow-flight-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14-dbgsym_{no_rc_version}-1_amd64.deb
-      - libarrow-flight14_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0-dbgsym_{no_rc_version}-1_amd64.deb
+      - libarrow-flight0_{no_rc_version}-1_amd64.deb
       - libarrow-python-dev_{no_rc_version}-1_amd64.deb
-      - libarrow-python14_{no_rc_version}-1_amd64.deb
-      - libarrow14_{no_rc_version}-1_amd64.deb
+      - libarrow-python0_{no_rc_version}-1_amd64.deb
+      - libarrow0_{no_rc_version}-1_amd64.deb
       - libgandiva-dev_{no_rc_version}-1_amd64.deb
       - libgandiva-glib-dev_{no_rc_version}-1_amd64.deb
-      - libgandiva-glib-doc_{no_rc_version}-1_all.deb
+      - libgandiva-glib-doc_{no_rc_version}-1_amd64.deb
       - libgandiva-glib14_{no_rc_version}-1_amd64.deb
       - libgandiva14_{no_rc_version}-1_amd64.deb
       - libparquet-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-dev_{no_rc_version}-1_amd64.deb
       - libparquet-glib-doc_{no_rc_version}-1_all.deb
-      - libparquet-glib14_{no_rc_version}-1_amd64.deb
-      - libparquet14_{no_rc_version}-1_amd64.deb
+      - libparquet-glib0_{no_rc_version}-1_amd64.deb
+      - libparquet0_{no_rc_version}-1_amd64.deb
       - libplasma-dev_{no_rc_version}-1_amd64.deb
       - libplasma-glib-dev_{no_rc_version}-1_amd64.deb
-      - libplasma-glib-doc_{no_rc_version}-1_all.deb
+      - libplasma-glib-doc_{no_rc_version}-1_amd64.deb
       - libplasma-glib14_{no_rc_version}-1_amd64.deb
       - libplasma14_{no_rc_version}-1_amd64.deb
       - plasma-store-server-dbgsym_{no_rc_version}-1_amd64.deb
diff --git a/dev/tasks/tests.yml b/dev/tasks/tests.yml
index 2c424fa007a..96744377912 100644
--- a/dev/tasks/tests.yml
+++ b/dev/tasks/tests.yml
@@ -20,11 +20,13 @@ groups:
   # makes it easier to submit related tasks
   docker:
     - docker-r
+    - docker-r-conda
     - docker-rust
     - docker-cpp
     - docker-cpp-alpine
     - docker-cpp-cmake32
     - docker-cpp-release
+    - docker-cpp-fuzzit
     - docker-c_glib
     - docker-go
     - docker-python-2.7
@@ -87,6 +89,16 @@ tasks:
         - docker-compose build r
         - docker-compose run r
 
+  docker-r-conda:
+    ci: circle
+    platform: linux
+    template: docker-tests/circle.linux.yml
+    params:
+      commands:
+        - docker-compose build cpp
+        - docker-compose build r-conda
+        - docker-compose run r-conda
+
   docker-rust:
     ci: circle
     platform: linux
@@ -132,6 +144,16 @@ tasks:
         - docker-compose build cpp-cmake32
         - docker-compose run cpp-cmake32
 
+  docker-cpp-fuzzit:
+    ci: circle
+    platform: linux
+    template: docker-tests/circle.linux.yml
+    params:
+      commands:
+        - docker-compose build cpp
+        - docker-compose build fuzzit
+        - docker-compose run fuzzit
+
   docker-c_glib:
     ci: circle
     platform: linux
diff --git a/docker-compose.yml b/docker-compose.yml
index 2fa5ab47438..507fd4a3528 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -114,10 +114,31 @@ services:
     build:
       context: .
       dockerfile: cpp/Dockerfile
-    environment:
-      PARQUET_TEST_DATA: /arrow/cpp/submodules/parquet-testing/data
     volumes: *ubuntu-volumes
 
+  cpp-system-deps:
+    # Usage:
+    #   docker-compose build cpp-system-deps
+    #   docker-compose run cpp-system-deps
+    image: ursalab/arrow:cpp-system-deps
+    shm_size: 2G
+    cap_add:
+      # LeakSanitizer and gdb requires ptrace(2)
+      - SYS_PTRACE
+    build:
+      context: .
+      dockerfile: cpp/Dockerfile.ubuntu-bionic
+      args:
+        LLVM_VERSION: 7
+    environment:
+      ARROW_FLIGHT: "ON"
+      ARROW_USE_ASAN: "ON"
+      ARROW_USE_UBSAN: "ON"
+    volumes:
+      - .:/arrow:delegated
+      - ubuntu-cache:/build:delegated
+      - ${HOME}/.ccache:/build/ccache:cached
+
   cpp-release:
     # Usage:
     #   docker-compose build cpp
@@ -292,6 +313,18 @@ services:
       - .:/arrow:ro  # ensures that docker won't contaminate the host directory
       - maven-cache:/root/.m2:delegated
 
+  java-all-jdks:
+    # Usage:
+    #   docker-compose build java-all-jdks
+    #   docker-compose run java-all-jdks
+    image: ursalab/arrow-ci-java-all-jdks:latest
+    build:
+      context: .
+      dockerfile: java/Dockerfile.all-jdks
+    volumes:
+      - .:/arrow:ro  # ensures that docker won't contaminate the host directory
+      - maven-cache:/root/.m2:delegated
+
   js:
     image: arrow:js
     build:
@@ -367,6 +400,21 @@ services:
       dockerfile: r/Dockerfile
     volumes: *ubuntu-volumes
 
+  r-conda:
+    # Usage:
+    #   export R_VERSION=3.5.1
+    #   docker-compose build cpp
+    #   docker-compose build r-conda
+    #   docker-compose run r-conda
+    image: arrow:r-conda-${R_VERSION:-3.5.1}
+    shm_size: 2G
+    build:
+      context: .
+      dockerfile: r/Dockerfile.conda
+      args:
+        R_VERSION: ${R_VERSION:-3.5.1}
+    volumes: *ubuntu-volumes
+
   ######################### Tools and Linters #################################
 
   # TODO(kszucs): site
@@ -386,6 +434,17 @@ services:
     command: arrow/dev/lint/run_linters.sh
     volumes: *ubuntu-volumes
 
+  fuzzit:
+    # Usage:
+    #   docker-compose build cpp
+    #   docker-compose build fuzzit
+    #   docker-compose run fuzzit
+    image: arrow:fuzzit
+    build:
+      context: .
+      dockerfile: dev/fuzzit/Dockerfile
+    volumes: *ubuntu-volumes
+
   iwyu:
     # Usage:
     #   export PYTHON_VERSION=3.6
@@ -439,7 +498,7 @@ services:
     #     $ docker-compose pull python-manylinux1
     #   an then run:
     #     $ docker-compose run -e PYTHON_VERSION=3.7 python-manylinux1
-    image: quay.io/ursa-labs/arrow_manylinux1_x86_64_base:latest
+    image: ursalab/arrow_manylinux1_x86_64_base:0.14.1-static-zlib
     build:
       context: python/manylinux1
       dockerfile: Dockerfile-x86_64_base
@@ -461,7 +520,7 @@ services:
     #     $ docker-compose pull python-manylinux2010
     #   an then run:
     #     $ docker-compose run -e PYTHON_VERSION=3.7 python-manylinux2010
-    image: quay.io/ursa-labs/arrow_manylinux2010_x86_64_base:latest
+    image: ursalab/arrow_manylinux2010_x86_64_base:0.14.1-static-zlib
     build:
       context: python/manylinux2010
       dockerfile: Dockerfile-x86_64_base
@@ -597,6 +656,16 @@ services:
 
   # TODO(kszucs): hive-integration
 
+  ################################# Release ###################################
+
+  release-source:
+    image: arrow:release-source
+    build:
+      context: .
+      dockerfile: dev/release/source/Dockerfile
+    volumes:
+      - .:/arrow:delegated
+
   ######################## Verification Containers ############################
 
   debian-stretch:
diff --git a/docs/source/cpp/api/table.rst b/docs/source/cpp/api/table.rst
index e8b4f8e066e..53e2d72e672 100644
--- a/docs/source/cpp/api/table.rst
+++ b/docs/source/cpp/api/table.rst
@@ -19,23 +19,6 @@
 Two-dimensional Datasets
 ========================
 
-Columns
-=======
-
-.. doxygenclass:: arrow::Column
-   :project: arrow_cpp
-   :members:
-
-Tables
-======
-
-.. doxygenclass:: arrow::Table
-   :project: arrow_cpp
-   :members:
-
-.. doxygenfunction:: arrow::ConcatenateTables
-   :project: arrow_cpp
-
 Record Batches
 ==============
 
@@ -50,3 +33,13 @@ Record Batches
 .. doxygenclass:: arrow::TableBatchReader
    :project: arrow_cpp
    :members:
+
+Tables
+======
+
+.. doxygenclass:: arrow::Table
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::ConcatenateTables
+   :project: arrow_cpp
diff --git a/docs/source/cpp/overview.rst b/docs/source/cpp/overview.rst
index 490efc1b7a2..53fc998eae6 100644
--- a/docs/source/cpp/overview.rst
+++ b/docs/source/cpp/overview.rst
@@ -51,10 +51,8 @@ The two-dimensional layer
 **Schemas** describe a logical collection of several pieces of data,
 each with a distinct name and type, and optional metadata.
 
-**Columns** are like chunked arrays, but with optional metadata.
-
-**Tables** are collections of columns in accordance to a schema.  They are
-the most capable dataset-providing abstraction in Arrow.
+**Tables** are collections of chunked array in accordance to a schema. They
+are the most capable dataset-providing abstraction in Arrow.
 
 **Record batches** are collections of contiguous arrays, described
 by a schema.  They allow incremental construction or serialization of tables.
diff --git a/docs/source/cpp/tables.rst b/docs/source/cpp/tables.rst
index d42f0c6c4f5..e929c6eecd8 100644
--- a/docs/source/cpp/tables.rst
+++ b/docs/source/cpp/tables.rst
@@ -56,20 +56,13 @@ function overloads::
    field_b = arrow::field("B", arrow::utf8());
    schema = arrow::schema({field_a, field_b});
 
-Columns
-=======
-
-A :class:`arrow::Column` is a chunked array tied together with a field.
-The field describes the column's name (for lookup in a larger dataset)
-and its metadata.
-
 Tables
 ======
 
-A :class:`arrow::Table` is a two-dimensional dataset of a number of columns,
-together with a schema.  The columns' names and types must match the schema.
-Also, each column must have the same logical length in number of elements
-(although each column can be chunked in a different way).
+A :class:`arrow::Table` is a two-dimensional dataset with chunked arrays for
+columns, together with a schema providing field names.  Also, each chunked
+column must have the same logical length in number of elements (although each
+column can be chunked in a different way).
 
 Record Batches
 ==============
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index 4691d2e0f0c..f25e030fd0e 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -94,6 +94,7 @@ The test groups currently include:
 * ``plasma``: Plasma Object Store tests
 * ``s3``: Tests for Amazon S3
 * ``tensorflow``: Tests that involve TensorFlow
+* ``flight``: Flight RPC tests
 
 Benchmarking
 ------------
diff --git a/docs/source/python/api/tables.rst b/docs/source/python/api/tables.rst
index 9d350a402ed..501230f8fa6 100644
--- a/docs/source/python/api/tables.rst
+++ b/docs/source/python/api/tables.rst
@@ -29,7 +29,6 @@ Factory Functions
    :toctree: ../generated/
 
    table
-   column
    chunked_array
    concat_tables
 
@@ -40,7 +39,6 @@ Classes
    :toctree: ../generated/
 
    ChunkedArray
-   Column
    RecordBatch
    Table
 
diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst
index 3260f6d6377..cb1eb6b1a7d 100644
--- a/docs/source/python/data.rst
+++ b/docs/source/python/data.rst
@@ -394,16 +394,15 @@ one or more copies of the batch using ``Table.from_batches``:
    table
    table.num_rows
 
-The table's columns are instances of :class:`~.Column`, which is a container
-for one or more arrays of the same type.
+The table's columns are instances of :class:`~.ChunkedArray`, which is a
+container for one or more arrays of the same type.
 
 .. ipython:: python
 
    c = table[0]
    c
-   c.data
-   c.data.num_chunks
-   c.data.chunk(0)
+   c.num_chunks
+   c.chunk(0)
 
 As you'll see in the :ref:`pandas section <pandas_interop>`, we can convert
 these objects to contiguous NumPy arrays for use in pandas:
@@ -421,7 +420,7 @@ Multiple tables can also be concatenated together to form a single table using
    table_all = pa.concat_tables(tables)
    table_all.num_rows
    c = table_all[0]
-   c.data.num_chunks
+   c.num_chunks
 
 This is similar to ``Table.from_batches``, but uses tables as input instead of
 record batches. Record batches can be made into tables, but not the other way
diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst
index 6b5c9ce1902..4ee20c77aee 100644
--- a/docs/source/python/extending.rst
+++ b/docs/source/python/extending.rst
@@ -81,11 +81,6 @@ C++ objects.
    Return whether *obj* wraps an Arrow C++ :class:`Buffer` pointer;
    in other words, whether *obj* is a :py:class:`pyarrow.Buffer` instance.
 
-.. function:: bool is_column(PyObject* obj)
-
-   Return whether *obj* wraps an Arrow C++ :class:`Column` pointer;
-   in other words, whether *obj* is a :py:class:`pyarrow.Column` instance.
-
 .. function:: bool is_data_type(PyObject* obj)
 
    Return whether *obj* wraps an Arrow C++ :class:`DataType` pointer;
@@ -116,6 +111,16 @@ C++ objects.
    Return whether *obj* wraps an Arrow C++ :class:`Tensor` pointer;
    in other words, whether *obj* is a :py:class:`pyarrow.Tensor` instance.
 
+.. function:: bool is_sparse_tensor_coo(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`SparseTensorCOO` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseTensorCOO` instance.
+
+.. function:: bool is_sparse_tensor_csr(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`SparseTensorCSR` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseTensorCSR` instance.
+
 The following functions expect a pyarrow object, unwrap the underlying
 Arrow C++ API pointer, and put it in the *out* parameter.  The returned
 :class:`Status` object must be inspected first to know whether any error
@@ -129,10 +134,6 @@ occurred.  If successful, *out* is guaranteed to be non-NULL.
 
    Unwrap the Arrow C++ :class:`Buffer` pointer from *obj* and put it in *out*.
 
-.. function:: Status unwrap_column(PyObject* obj, std::shared_ptr<Column>* out)
-
-   Unwrap the Arrow C++ :class:`Column` pointer from *obj* and put it in *out*.
-
 .. function:: Status unwrap_data_type(PyObject* obj, std::shared_ptr<DataType>* out)
 
    Unwrap the Arrow C++ :class:`DataType` pointer from *obj* and put it in *out*.
@@ -157,6 +158,14 @@ occurred.  If successful, *out* is guaranteed to be non-NULL.
 
    Unwrap the Arrow C++ :class:`Tensor` pointer from *obj* and put it in *out*.
 
+.. function:: Status unwrap_sparse_tensor_coo(PyObject* obj, std::shared_ptr<SparseTensorCOO>* out)
+
+   Unwrap the Arrow C++ :class:`SparseTensorCOO` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_sparse_tensor_csr(PyObject* obj, std::shared_ptr<SparseTensorCSR>* out)
+
+   Unwrap the Arrow C++ :class:`SparseTensorCSR` pointer from *obj* and put it in *out*.
+
 The following functions take an Arrow C++ API pointer and wrap it in a
 pyarray object of the corresponding type.  A new reference is returned.
 On error, NULL is returned and a Python exception is set.
@@ -169,10 +178,6 @@ On error, NULL is returned and a Python exception is set.
 
    Wrap the Arrow C++ *buffer* in a :py:class:`pyarrow.Buffer` instance.
 
-.. function:: PyObject* wrap_column(const std::shared_ptr<Column>& column)
-
-   Wrap the Arrow C++ *column* in a :py:class:`pyarrow.Column` instance.
-
 .. function:: PyObject* wrap_data_type(const std::shared_ptr<DataType>& data_type)
 
    Wrap the Arrow C++ *data_type* in a :py:class:`pyarrow.DataType` instance.
@@ -197,6 +202,14 @@ On error, NULL is returned and a Python exception is set.
 
    Wrap the Arrow C++ *tensor* in a :py:class:`pyarrow.Tensor` instance.
 
+.. function:: PyObject* wrap_sparse_tensor_coo(const std::shared_ptr<SparseTensorCOO>& sparse_tensor)
+
+   Wrap the Arrow C++ *COO sparse tensor* in a :py:class:`pyarrow.SparseTensorCOO` instance.
+
+.. function:: PyObject* wrap_sparse_tensor_csr(const std::shared_ptr<SparseTensorCSR>& sparse_tensor)
+
+   Wrap the Arrow C++ *CSR sparse tensor* in a :py:class:`pyarrow.SparseTensorCSR` instance.
+
 
 Cython API
 ----------
@@ -233,10 +246,6 @@ an exception) if the input is not of the right type.
 
    Unwrap the Arrow C++ :cpp:class:`Buffer` pointer from *obj*.
 
-.. function:: pyarrow_unwrap_column(obj) -> shared_ptr[CColumn]
-
-   Unwrap the Arrow C++ :cpp:class:`Column` pointer from *obj*.
-
 .. function:: pyarrow_unwrap_data_type(obj) -> shared_ptr[CDataType]
 
    Unwrap the Arrow C++ :cpp:class:`CDataType` pointer from *obj*.
@@ -257,6 +266,14 @@ an exception) if the input is not of the right type.
 
    Unwrap the Arrow C++ :cpp:class:`Tensor` pointer from *obj*.
 
+.. function:: pyarrow_unwrap_sparse_tensor_coo(obj) -> shared_ptr[CSparseTensorCOO]
+
+   Unwrap the Arrow C++ :cpp:class:`SparseTensorCOO` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_sparse_tensor_csr(obj) -> shared_ptr[CSparseTensorCSR]
+
+   Unwrap the Arrow C++ :cpp:class:`SparseTensorCSR` pointer from *obj*.
+
 The following functions take a Arrow C++ API pointer and wrap it in a
 pyarray object of the corresponding type.  An exception is raised on error.
 
@@ -272,10 +289,6 @@ pyarray object of the corresponding type.  An exception is raised on error.
 
    Wrap the Arrow C++ *buffer* in a Python :class:`pyarrow.Buffer` instance.
 
-.. function:: pyarrow_wrap_column(sp_array: const shared_ptr[CColumn]& column) -> object
-
-   Wrap the Arrow C++ *column* in a Python :class:`pyarrow.Column` instance.
-
 .. function:: pyarrow_wrap_data_type(sp_array: const shared_ptr[CDataType]& data_type) -> object
 
    Wrap the Arrow C++ *data_type* in a Python :class:`pyarrow.DataType` instance.
@@ -300,6 +313,14 @@ pyarray object of the corresponding type.  An exception is raised on error.
 
    Wrap the Arrow C++ *tensor* in a Python :class:`pyarrow.Tensor` instance.
 
+.. function:: pyarrow_wrap_sparse_tensor_coo(sp_array: const shared_ptr[CSparseTensorCOO]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *COO sparse tensor* in a Python :class:`pyarrow.SparseTensorCOO` instance.
+
+.. function:: pyarrow_wrap_sparse_tensor_csr(sp_array: const shared_ptr[CSparseTensorCSR]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *CSR sparse tensor* in a Python :class:`pyarrow.SparseTensorCSR` instance.
+
 Example
 ~~~~~~~
 
diff --git a/format/Schema.fbs b/format/Schema.fbs
index 36127925eff..91aa9db487f 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -103,13 +103,22 @@ table FloatingPoint {
   precision: Precision;
 }
 
-/// Unicode with UTF-8 encoding
+/// UTF-8 encoded Unicode strings.  Items are limited to 32-bit byte lengths.
 table Utf8 {
 }
 
+/// Raw binary strings.  Items are limited to 32-bit byte lengths.
 table Binary {
 }
 
+/// Variants of Utf8 and Binary with 64-bit byte lengths.
+/// These types are optional and may not be supported by all implementations.
+table LargeUtf8 {
+}
+
+table LargeBinary {
+}
+
 table FixedSizeBinary {
   /// Number of bytes per value
   byteWidth: int;
@@ -235,6 +244,8 @@ union Type {
   FixedSizeList,
   Map,
   Duration,
+  LargeBinary,
+  LargeUtf8,
 }
 
 /// ----------------------------------------------------------------------
diff --git a/integration/turbodbc/runtest.sh b/integration/turbodbc/runtest.sh
index 31f924336fe..874cba05434 100755
--- a/integration/turbodbc/runtest.sh
+++ b/integration/turbodbc/runtest.sh
@@ -25,9 +25,8 @@ python -c "import pyarrow.orc"
 python -c "import pyarrow.parquet"
 
 pushd /tmp
-git clone https://github.com/xhochy/turbodbc.git
+git clone https://github.com/blue-yonder/turbodbc.git
 pushd turbodbc
-git checkout arrow-0.13.0-prep
 git submodule update --init --recursive
 
 service postgresql start
diff --git a/java/Dockerfile.all-jdks b/java/Dockerfile.all-jdks
new file mode 100644
index 00000000000..bf4e2afa227
--- /dev/null
+++ b/java/Dockerfile.all-jdks
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:18.04
+
+# install build essentials
+RUN export DEBIAN_FRONTEND=noninteractive && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        wget \
+        software-properties-common \
+        ca-certificates \
+        maven \
+        rsync \
+        tzdata \
+        openjdk-8-jdk \
+        openjdk-11-jdk && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Test all supported JDKs
+CMD ["arrow/ci/docker_java_test_all.sh"]
diff --git a/java/README.md b/java/README.md
index b19bfdafecf..23575bfd1a3 100644
--- a/java/README.md
+++ b/java/README.md
@@ -28,6 +28,7 @@ install:
 ## Building and running tests
 
 ```
+git submodule update --init --recursive # Needed for flight
 cd java
 mvn install
 ```
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
new file mode 100644
index 00000000000..8ef7548d38e
--- /dev/null
+++ b/java/adapter/avro/pom.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+    license agreements. See the NOTICE file distributed with this work for additional
+    information regarding copyright ownership. The ASF licenses this file to
+    You under the Apache License, Version 2.0 (the "License"); you may not use
+    this file except in compliance with the License. You may obtain a copy of
+    the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+    by applicable law or agreed to in writing, software distributed under the
+    License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+    OF ANY KIND, either express or implied. See the License for the specific
+    language governing permissions and limitations under the License. -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.arrow</groupId>
+    <artifactId>arrow-java-root</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>arrow-avro</artifactId>
+  <name>Arrow AVRO Adapter</name>
+  <url>http://maven.apache.org</url>
+
+  <dependencies>
+
+    <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory -->
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector -->
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.9.0</version>
+    </dependency>
+  </dependencies>
+
+</project>
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
new file mode 100644
index 00000000000..4801d690125
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.BaseAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Utility class to convert Avro objects to columnar Arrow format objects.
+ */
+public class AvroToArrow {
+
+  /**
+   * Fetch the data from {@link GenericDatumReader} and convert it to Arrow objects.
+   * @param schema avro schema.
+   * @param allocator Memory allocator to use.
+   * @return Arrow Data Objects {@link VectorSchemaRoot}
+   */
+  public static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, BaseAllocator allocator)
+      throws IOException {
+    Preconditions.checkNotNull(schema, "Avro schema object can not be null");
+
+    VectorSchemaRoot root = VectorSchemaRoot.create(
+        AvroToArrowUtils.avroToArrowSchema(schema), allocator);
+    AvroToArrowUtils.avroToArrowVectors(decoder, root);
+    return root;
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
new file mode 100644
index 00000000000..c142689ddda
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.consumers.AvroBooleanConsumer;
+import org.apache.arrow.consumers.AvroBytesConsumer;
+import org.apache.arrow.consumers.AvroDoubleConsumer;
+import org.apache.arrow.consumers.AvroFloatConsumer;
+import org.apache.arrow.consumers.AvroIntConsumer;
+import org.apache.arrow.consumers.AvroLongConsumer;
+import org.apache.arrow.consumers.AvroStringConsumer;
+import org.apache.arrow.consumers.Consumer;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Class that does most of the work to convert Avro data into Arrow columnar format Vector objects.
+ */
+public class AvroToArrowUtils {
+
+  private static final int DEFAULT_BUFFER_SIZE = 256;
+
+  /**
+   * Creates an {@link org.apache.arrow.vector.types.pojo.ArrowType} from the {@link Schema.Field}
+   *
+   <p>This method currently performs following type mapping for Avro data types to corresponding Arrow data types.
+   *
+   * <ul>
+   *   <li>STRING --> ArrowType.Utf8</li>
+   *   <li>INT --> ArrowType.Int(32, signed)</li>
+   *   <li>LONG --> ArrowType.Int(64, signed)</li>
+   *   <li>FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li>
+   *   <li>DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)</li>
+   *   <li>BOOLEAN --> ArrowType.Bool</li>
+   *   <li>BYTES --> ArrowType.Binary</li>
+   * </ul>
+   */
+  private static ArrowType getArrowType(Type type) {
+
+    Preconditions.checkNotNull(type, "Avro type object can't be null");
+
+    switch (type) {
+      case STRING:
+        return new ArrowType.Utf8();
+      case INT:
+        return new ArrowType.Int(32, /*signed=*/true);
+      case BOOLEAN:
+        return new ArrowType.Bool();
+      case LONG:
+        return new ArrowType.Int(64, /*signed=*/true);
+      case FLOAT:
+        return new ArrowType.FloatingPoint(SINGLE);
+      case DOUBLE:
+        return new ArrowType.FloatingPoint(DOUBLE);
+      case BYTES:
+        return new ArrowType.Binary();
+      default:
+        // no-op, shouldn't get here
+        throw new RuntimeException("Can't convert avro type %s to arrow type." + type.getName());
+    }
+  }
+
+  /**
+   * Create Arrow {@link org.apache.arrow.vector.types.pojo.Schema} object for the given Avro {@link Schema}.
+   */
+  public static org.apache.arrow.vector.types.pojo.Schema avroToArrowSchema(Schema schema) {
+
+    Preconditions.checkNotNull(schema, "Avro Schema object can't be null");
+    List<Field> arrowFields = new ArrayList<>();
+
+    Schema.Type type = schema.getType();
+    final Map<String, String> metadata = new HashMap<>();
+    schema.getObjectProps().forEach((k,v) -> metadata.put(k, v.toString()));
+
+    if (type == Type.RECORD) {
+      throw new UnsupportedOperationException();
+    } else if (type == Type.MAP) {
+      throw new UnsupportedOperationException();
+    } else if (type == Type.UNION) {
+      throw new UnsupportedOperationException();
+    } else if (type == Type.ARRAY) {
+      throw new UnsupportedOperationException();
+    } else if (type == Type.ENUM) {
+      throw new UnsupportedOperationException();
+    } else if (type == Type.NULL) {
+      throw new UnsupportedOperationException();
+    } else {
+      final FieldType fieldType = new FieldType(true, getArrowType(type), null, null);
+      arrowFields.add(new Field("", fieldType, null));
+    }
+
+    return new org.apache.arrow.vector.types.pojo.Schema(arrowFields, /*metadata=*/ metadata);
+  }
+
+  /**
+   * Create consumers to consume avro values from decoder, will reduce boxing/unboxing operations.
+   */
+  public static Consumer[] createAvroConsumers(VectorSchemaRoot root) {
+
+    Consumer[] consumers = new Consumer[root.getFieldVectors().size()];
+    for (int i = 0; i < root.getFieldVectors().size(); i++) {
+      FieldVector vector = root.getFieldVectors().get(i);
+      Consumer consumer;
+      switch (vector.getMinorType()) {
+        case INT:
+          consumer = new AvroIntConsumer((IntVector) vector);
+          break;
+        case VARBINARY:
+          consumer = new AvroBytesConsumer((VarBinaryVector) vector);
+          break;
+        case VARCHAR:
+          consumer = new AvroStringConsumer((VarCharVector) vector);
+          break;
+        case BIGINT:
+          consumer = new AvroLongConsumer((BigIntVector) vector);
+          break;
+        case FLOAT4:
+          consumer = new AvroFloatConsumer((Float4Vector) vector);
+          break;
+        case FLOAT8:
+          consumer = new AvroDoubleConsumer((Float8Vector) vector);
+          break;
+        case BIT:
+          consumer = new AvroBooleanConsumer((BitVector) vector);
+          break;
+        default:
+          throw new RuntimeException("could not get consumer from type:" + vector.getMinorType());
+      }
+      consumers[i] = consumer;
+    }
+    return consumers;
+  }
+
+  /**
+   * Iterate the given Avro {@link Decoder} object to fetch the data and transpose it to populate
+   * the given Arrow Vector objects.
+   * @param decoder avro decoder to read data.
+   * @param root Arrow {@link VectorSchemaRoot} object to populate
+   */
+  public static void avroToArrowVectors(Decoder decoder, VectorSchemaRoot root) throws IOException {
+
+    Preconditions.checkNotNull(decoder, "Avro decoder object can't be null");
+    Preconditions.checkNotNull(root, "VectorSchemaRoot object can't be null");
+
+    allocateVectors(root, DEFAULT_BUFFER_SIZE);
+    Consumer[] consumers = createAvroConsumers(root);
+    while (true) {
+      try {
+        for (Consumer consumer : consumers) {
+          consumer.consume(decoder);
+        }
+        //reach end will throw EOFException.
+      } catch (EOFException eofException) {
+        break;
+      }
+    }
+  }
+
+  private static void allocateVectors(VectorSchemaRoot root, int size) {
+    List<FieldVector> vectors = root.getFieldVectors();
+    for (FieldVector fieldVector : vectors) {
+      if (fieldVector instanceof BaseFixedWidthVector) {
+        ((BaseFixedWidthVector) fieldVector).allocateNew(size);
+      } else {
+        fieldVector.allocateNew();
+      }
+      fieldVector.setInitialCapacity(size);
+    }
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
new file mode 100644
index 00000000000..7bbfac1a230
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.complex.impl.BitWriterImpl;
+import org.apache.arrow.vector.complex.writer.BitWriter;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume boolean type values from avro decoder.
+ * Write the data to {@link BitVector}.
+ */
+public class AvroBooleanConsumer implements Consumer {
+
+  private final BitWriter writer;
+
+  public AvroBooleanConsumer(BitVector vector) {
+    this.writer = new BitWriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    writer.writeBit(decoder.readBoolean() ? 1 : 0);
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
new file mode 100644
index 00000000000..9c3eff70d73
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl;
+import org.apache.arrow.vector.complex.writer.VarBinaryWriter;
+import org.apache.arrow.vector.holders.VarBinaryHolder;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume bytes type values from avro decoder.
+ * Write the data to {@link VarBinaryVector}.
+ */
+public class AvroBytesConsumer implements Consumer {
+
+  private final VarBinaryWriter writer;
+  private final VarBinaryVector vector;
+
+  public AvroBytesConsumer(VarBinaryVector vector) {
+    this.vector = vector;
+    this.writer = new VarBinaryWriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    VarBinaryHolder holder = new VarBinaryHolder();
+    ByteBuffer byteBuffer = decoder.readBytes(null);
+
+    holder.start = 0;
+    holder.end = byteBuffer.capacity();
+    holder.buffer = vector.getAllocator().buffer(byteBuffer.capacity());
+    holder.buffer.setBytes(0, byteBuffer);
+
+    writer.write(holder);
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
new file mode 100644
index 00000000000..62dc315084f
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
+import org.apache.arrow.vector.complex.writer.Float8Writer;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume double type values from avro decoder.
+ * Write the data to {@link Float8Vector}.
+ */
+public class AvroDoubleConsumer implements Consumer {
+
+  private final Float8Writer writer;
+
+  public AvroDoubleConsumer(Float8Vector vector) {
+    this.writer = new Float8WriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    writer.writeFloat8(decoder.readDouble());
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
new file mode 100644
index 00000000000..2bec2b2d090
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
+import org.apache.arrow.vector.complex.writer.Float4Writer;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume float type values from avro decoder.
+ * Write the data to {@link Float4Vector}.
+ */
+public class AvroFloatConsumer implements Consumer {
+
+  private final Float4Writer writer;
+
+  public AvroFloatConsumer(Float4Vector vector) {
+    this.writer = new Float4WriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    writer.writeFloat4(decoder.readFloat());
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
new file mode 100644
index 00000000000..60285f06af4
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.complex.impl.IntWriterImpl;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume int type values from avro decoder.
+ * Write the data to {@link IntVector}.
+ */
+public class AvroIntConsumer implements Consumer {
+
+  private final IntWriter writer;
+
+  public AvroIntConsumer(IntVector vector) {
+    this.writer = new IntWriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    writer.writeInt(decoder.readInt());
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
new file mode 100644
index 00000000000..15756afd69f
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.complex.impl.BigIntWriterImpl;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume long type values from avro decoder.
+ * Write the data to {@link BigIntVector}.
+ */
+public class AvroLongConsumer implements Consumer {
+
+  private final BigIntWriter writer;
+
+  public AvroLongConsumer(BigIntVector vector) {
+    this.writer = new BigIntWriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    writer.writeBigInt(decoder.readLong());
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
new file mode 100644
index 00000000000..db438f96e91
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.impl.VarCharWriterImpl;
+import org.apache.arrow.vector.complex.writer.VarCharWriter;
+import org.apache.arrow.vector.holders.VarCharHolder;
+import org.apache.avro.io.Decoder;
+
+/**
+ * Consumer which consume string type values from avro decoder.
+ * Write the data to {@link VarCharVector}.
+ */
+public class AvroStringConsumer implements Consumer {
+
+  private final VarCharVector vector;
+  private final VarCharWriter writer;
+
+  public AvroStringConsumer(VarCharVector vector) {
+    this.vector = vector;
+    this.writer = new VarCharWriterImpl(vector);
+  }
+
+  @Override
+  public void consume(Decoder decoder) throws IOException {
+    VarCharHolder holder = new VarCharHolder();
+    ByteBuffer byteBuffer = decoder.readBytes(null);
+
+    holder.start = 0;
+    holder.end = byteBuffer.capacity();
+    holder.buffer = vector.getAllocator().buffer(byteBuffer.capacity());
+    holder.buffer.setBytes(0, byteBuffer);
+
+    writer.write(holder);
+    writer.setPosition(writer.getPosition() + 1);
+  }
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
new file mode 100644
index 00000000000..b3c52818491
--- /dev/null
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.consumers;
+
+import java.io.IOException;
+
+import org.apache.avro.io.Decoder;
+
+/**
+ * An abstraction that is used to consume values from avro decoder.
+ */
+public interface Consumer {
+
+  void consume(Decoder decoder) throws IOException;
+}
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
new file mode 100644
index 00000000000..d880639acc4
--- /dev/null
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.memory.BaseAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.io.BinaryDecoder;
+import org.apache.avro.io.BinaryEncoder;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.DecoderFactory;
+import org.apache.avro.io.EncoderFactory;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class AvroToArrowTest {
+
+  @ClassRule
+  public static final TemporaryFolder TMP = new TemporaryFolder();
+
+  private BaseAllocator allocator;
+
+  @Before
+  public void init() {
+    allocator = new RootAllocator(Long.MAX_VALUE);
+  }
+
+  private Schema getSchema(String schemaName) throws Exception {
+    Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(),
+        "schema", schemaName);
+    return new Schema.Parser().parse(schemaPath.toFile());
+  }
+
+  private VectorSchemaRoot writeAndReadPrimitive(Schema schema, List data) throws Exception {
+    File dataFile = TMP.newFile();
+
+    BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null);
+    DatumWriter writer = new GenericDatumWriter(schema);
+    BinaryDecoder decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null);
+
+    for (Object value : data) {
+      writer.write(value, encoder);
+    }
+
+    return AvroToArrow.avroToArrow(schema, decoder, allocator);
+  }
+
+  @Test
+  public void testStringType() throws Exception {
+    Schema schema = getSchema("test_primitive_string.avsc");
+    ArrayList<String> data = new ArrayList(Arrays.asList("v1", "v2", "v3", "v4", "v5"));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testIntType() throws Exception {
+    Schema schema = getSchema("test_primitive_int.avsc");
+    ArrayList<Integer> data = new ArrayList(Arrays.asList(1, 2, 3, 4, 5));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testLongType() throws Exception {
+    Schema schema = getSchema("test_primitive_long.avsc");
+    ArrayList<Long> data = new ArrayList(Arrays.asList(1L, 2L, 3L, 4L, 5L));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testFloatType() throws Exception {
+    Schema schema = getSchema("test_primitive_float.avsc");
+    ArrayList<Float> data = new ArrayList(Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testDoubleType() throws Exception {
+    Schema schema = getSchema("test_primitive_double.avsc");
+    ArrayList<Double> data = new ArrayList(Arrays.asList(1.1, 2.2, 3.3, 4.4, 5.5));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testBytesType() throws Exception {
+    Schema schema = getSchema("test_primitive_bytes.avsc");
+    ArrayList<ByteBuffer> data = new ArrayList(Arrays.asList(
+        ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)),
+        ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)),
+        ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)),
+        ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)),
+        ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  @Test
+  public void testBooleanType() throws Exception {
+    Schema schema = getSchema("test_primitive_boolean.avsc");
+    ArrayList<Boolean> data = new ArrayList(Arrays.asList(true, false, true, false, true));
+
+    VectorSchemaRoot root = writeAndReadPrimitive(schema, data);
+    FieldVector vector = root.getFieldVectors().get(0);
+
+    checkPrimitiveResult(schema, data, vector);
+  }
+
+  private void checkPrimitiveResult(Schema schema, ArrayList data, FieldVector vector) {
+    assertEquals(data.size(), vector.getValueCount());
+    for (int i = 0; i < data.size(); i++) {
+      Object value1 = data.get(i);
+      Object value2 = vector.getObject(i);
+      if (schema.getType() == Schema.Type.BYTES) {
+        value2 = ByteBuffer.wrap((byte[]) value2);
+      } else if (schema.getType() == Schema.Type.STRING) {
+        value2 = value2.toString();
+      }
+      assertTrue(Objects.equals(value1, value2));
+    }
+  }
+}
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
new file mode 100644
index 00000000000..aa5c45a52e2
--- /dev/null
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileReader;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.avro.io.DatumWriter;
+
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+
+public class TestWriteReadAvroRecord {
+
+  @ClassRule
+  public static final TemporaryFolder TMP = new TemporaryFolder();
+
+  @Test
+  public void testWriteAndRead() throws Exception {
+
+    File dataFile = TMP.newFile();
+    Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), "schema", "test.avsc");
+    Schema schema = new Schema.Parser().parse(schemaPath.toFile());
+
+    //write data to disk
+    GenericRecord user1 = new GenericData.Record(schema);
+    user1.put("name", "Alyssa");
+    user1.put("favorite_number", 256);
+
+    GenericRecord user2 = new GenericData.Record(schema);
+    user2.put("name", "Ben");
+    user2.put("favorite_number", 7);
+    user2.put("favorite_color", "red");
+
+    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
+    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+    dataFileWriter.create(schema, dataFile);
+    dataFileWriter.append(user1);
+    dataFileWriter.append(user2);
+    dataFileWriter.close();
+
+    //read data from disk
+    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
+    DataFileReader<GenericRecord>
+        dataFileReader = new DataFileReader<GenericRecord>(dataFile, datumReader);
+    List<GenericRecord> result = new ArrayList<>();
+    while (dataFileReader.hasNext()) {
+      GenericRecord user = dataFileReader.next();
+      result.add(user);
+    }
+
+    assertEquals(2, result.size());
+    GenericRecord deUser1 = result.get(0);
+    assertEquals("Alyssa", deUser1.get("name").toString());
+    assertEquals(256, deUser1.get("favorite_number"));
+    assertEquals(null, deUser1.get("favorite_color"));
+
+    GenericRecord deUser2 = result.get(1);
+    assertEquals("Ben", deUser2.get("name").toString());
+    assertEquals(7, deUser2.get("favorite_number"));
+    assertEquals("red", deUser2.get("favorite_color").toString());
+  }
+
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test.avsc b/java/adapter/avro/src/test/resources/schema/test.avsc
new file mode 100644
index 00000000000..92c0873de1d
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test.avsc
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "record",
+ "name": "User",
+ "fields": [
+     {"name": "name", "type": "string"},
+     {"name": "favorite_number",  "type": ["int", "null"]},
+     {"name": "favorite_color", "type": ["string", "null"]}
+ ]
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc
new file mode 100644
index 00000000000..7652ce72385
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "boolean",
+ "name": "TestBoolean"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc
new file mode 100644
index 00000000000..5102430b65a
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "bytes",
+ "name": "TestBytes"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc
new file mode 100644
index 00000000000..d1ae0b605a9
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "double",
+ "name": "TestDouble"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc
new file mode 100644
index 00000000000..675d1090d86
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "float",
+ "name": "TestFloat"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc
new file mode 100644
index 00000000000..8fc8488281a
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "int",
+ "name": "TestInt"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc
new file mode 100644
index 00000000000..b9706107c09
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "long",
+ "name": "TestLong"
+}
diff --git a/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc b/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc
new file mode 100644
index 00000000000..b4a89a7f62c
--- /dev/null
+++ b/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+{
+ "namespace": "org.apache.arrow.avro",
+ "type": "string",
+ "name": "TestString"
+}
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index c4c9574180f..b8cf0414bfc 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.14.0</version>
+        <version>1.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
index d5be486c843..badc5d2aa7f 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -60,8 +60,6 @@
  * CLOB --> ArrowType.Utf8
  * BLOB --> ArrowType.Binary
  *
- * <p>TODO: At this time, SQL Data type java.sql.Types.ARRAY is still not supported.
- *
  * @since 0.10.0
  */
 public class JdbcToArrow {
@@ -83,8 +81,6 @@ public class JdbcToArrow {
    */
   public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BaseAllocator allocator)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
-    Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
     Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
 
     JdbcToArrowConfig config =
@@ -111,8 +107,6 @@ public static VectorSchemaRoot sqlToArrow(
       BaseAllocator allocator,
       Calendar calendar) throws SQLException, IOException {
 
-    Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
-    Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
     Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
     Preconditions.checkNotNull(calendar, "Calendar object can not be null");
 
@@ -135,7 +129,6 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J
       throws SQLException, IOException {
     Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
     Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
-    Preconditions.checkNotNull(config, "The configuration cannot be null");
 
     try (Statement stmt = connection.createStatement()) {
       return sqlToArrow(stmt.executeQuery(query), config);
@@ -166,7 +159,6 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti
    */
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator allocator)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
     Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
 
     JdbcToArrowConfig config = 
@@ -201,7 +193,6 @@ public static VectorSchemaRoot sqlToArrow(
           BaseAllocator allocator,
           Calendar calendar)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
     Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
 
     return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index 068fd07c7ed..800db1b00d1 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -28,7 +28,8 @@
  * This class configures the JDBC-to-Arrow conversion process.
  * <p>
  * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
- * and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
+ * and the calendar is used to define the time zone of any
+ * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp}
  * fields that are created during the conversion.  Neither field may be <code>null</code>.
  * </p>
  * <p>
@@ -98,8 +99,8 @@ public final class JdbcToArrowConfig {
 
   /**
    * The calendar to use when defining Arrow Timestamp fields
-   * and retrieving {@link Date}, {@link Time}, or {@link Timestamp}
-   * data types from the {@link ResultSet}, or <code>null</code> if not converting.
+   * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp}
+   * data types from the {@link java.sql.ResultSet}, or <code>null</code> if not converting.
    *
    * @return the calendar.
    */
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index f54363f5bef..1fbad896528 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -128,7 +128,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
    * {@link org.apache.arrow.vector.types.pojo.ArrowType} for each field in the {@link java.sql.ResultSetMetaData}.
    * </p>
    * <p>
-   * If {@link JdbcToArrowConfig#getIncludeMetadata()} returns <code>true</code>, the following fields
+   * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns <code>true</code>, the following fields
    * will be added to the {@link FieldType#getMetadata()}:
    * <ul>
    *  <li>{@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}</li>
@@ -243,39 +243,28 @@ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calend
       timezone = null;
     }
 
-
-    final ArrowType arrowType;
-
     switch (fieldInfo.getJdbcType()) {
       case Types.BOOLEAN:
       case Types.BIT:
-        arrowType = new ArrowType.Bool();
-        break;
+        return new ArrowType.Bool();
       case Types.TINYINT:
-        arrowType = new ArrowType.Int(8, true);
-        break;
+        return new ArrowType.Int(8, true);
       case Types.SMALLINT:
-        arrowType = new ArrowType.Int(16, true);
-        break;
+        return new ArrowType.Int(16, true);
       case Types.INTEGER:
-        arrowType = new ArrowType.Int(32, true);
-        break;
+        return new ArrowType.Int(32, true);
       case Types.BIGINT:
-        arrowType = new ArrowType.Int(64, true);
-        break;
+        return new ArrowType.Int(64, true);
       case Types.NUMERIC:
       case Types.DECIMAL:
         int precision = fieldInfo.getPrecision();
         int scale = fieldInfo.getScale();
-        arrowType = new ArrowType.Decimal(precision, scale);
-        break;
+        return new ArrowType.Decimal(precision, scale);
       case Types.REAL:
       case Types.FLOAT:
-        arrowType = new ArrowType.FloatingPoint(SINGLE);
-        break;
+        return new ArrowType.FloatingPoint(SINGLE);
       case Types.DOUBLE:
-        arrowType = new ArrowType.FloatingPoint(DOUBLE);
-        break;
+        return new ArrowType.FloatingPoint(DOUBLE);
       case Types.CHAR:
       case Types.NCHAR:
       case Types.VARCHAR:
@@ -283,33 +272,24 @@ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calend
       case Types.LONGVARCHAR:
       case Types.LONGNVARCHAR:
       case Types.CLOB:
-        arrowType = new ArrowType.Utf8();
-        break;
+        return new ArrowType.Utf8();
       case Types.DATE:
-        arrowType = new ArrowType.Date(DateUnit.MILLISECOND);
-        break;
+        return new ArrowType.Date(DateUnit.MILLISECOND);
       case Types.TIME:
-        arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
-        break;
+        return new ArrowType.Time(TimeUnit.MILLISECOND, 32);
       case Types.TIMESTAMP:
-        arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone);
-        break;
+        return new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone);
       case Types.BINARY:
       case Types.VARBINARY:
       case Types.LONGVARBINARY:
       case Types.BLOB:
-        arrowType = new ArrowType.Binary();
-        break;
+        return new ArrowType.Binary();
       case Types.ARRAY:
-        arrowType = new ArrowType.List();
-        break;
+        return new ArrowType.List();
       default:
         // no-op, shouldn't get here
-        arrowType = null;
-        break;
+        return null;
     }
-
-    return arrowType;
   }
 
   /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
@@ -382,7 +362,7 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
       throws SQLException, IOException {
 
     Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
-    Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
+    Preconditions.checkNotNull(root, "VectorSchemaRoot object can't be null");
     Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
 
     ResultSetMetaData rsmd = rs.getMetaData();
@@ -510,7 +490,6 @@ private static void jdbcToFieldVector(
         updateVector((VarBinaryVector) vector,
                 rs.getBlob(columnIndex), !rs.wasNull(), rowCount);
         break;
-
       default:
         // no-op, shouldn't get here
         break;
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 9d60b1d292c..0ee1cdd6657 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -86,7 +86,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.14.0</version>
+        <version>1.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 5101dae05cb..90ea24a4779 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.14.0</version>
+    <version>1.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
new file mode 100644
index 00000000000..8bed811e2fe
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Search for a particular element in the vector.
+ */
+public final class VectorSearcher {
+
+  /**
+   * Search for a particular element from the key vector in the target vector by binary search.
+   * The target vector must be sorted.
+   * @param targetVector the vector from which to perform the sort.
+   * @param comparator the criterion for the sort.
+   * @param keyVector the vector containing the element to search.
+   * @param keyIndex the index of the search key in the key vector.
+   * @param <V> the vector type.
+   * @return the index of a matched element if any, and -1 otherwise.
+   */
+  public static <V extends ValueVector> int binarySearch(
+          V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+    comparator.attachVectors(keyVector, targetVector);
+
+    // perform binary search
+    int low = 0;
+    int high = targetVector.getValueCount() - 1;
+
+    while (low <= high) {
+      int mid = (high + low) / 2;
+
+      if (mid < 0) {
+        // overflow has occurred, so calculate the mid by converting to long first
+        mid = (int) (((long) high + (long) low) / 2L);
+      }
+
+      int cmp = comparator.compare(keyIndex, mid);
+      if (cmp < 0) {
+        high = mid - 1;
+      } else if (cmp > 0) {
+        low = mid + 1;
+      } else {
+        return mid;
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * Search for a particular element from the key vector in the target vector by traversing the vector in sequence.
+   * @param targetVector the vector from which to perform the sort.
+   * @param comparator the criterion for element equality.
+   * @param keyVector the vector containing the element to search.
+   * @param keyIndex the index of the search key in the key vector.
+   * @param <V> the vector type.
+   * @return the index of a matched element if any, and -1 otherwise.
+   */
+  public static <V extends ValueVector> int linearSearch(
+          V targetVector, VectorValueComparator<V> comparator, V keyVector, int keyIndex) {
+    comparator.attachVectors(keyVector, targetVector);
+    for (int i = 0; i < targetVector.getValueCount(); i++) {
+      if (comparator.compare(keyIndex, i) == 0) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  private VectorSearcher() {
+
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index e16b9ecdae6..a2d2f786603 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -17,15 +17,87 @@
 
 package org.apache.arrow.algorithm.sort;
 
+import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
-import org.apache.arrow.vector.VarCharVector;
-import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.ValueVector;
 
 /**
  * Default comparator implementations for different types of vectors.
  */
 public class DefaultVectorComparators {
 
+  /**
+   * Create the default comparator for the vector.
+   * @param vector the vector.
+   * @param <T> the vector type.
+   * @return the default comparator.
+   */
+  public static <T extends ValueVector> VectorValueComparator<T> createDefaultComparator(T vector) {
+    if (vector instanceof BaseFixedWidthVector) {
+      if (vector instanceof TinyIntVector) {
+        return (VectorValueComparator<T>) new ByteComparator();
+      } else if (vector instanceof SmallIntVector) {
+        return (VectorValueComparator<T>) new ShortComparator();
+      } else if (vector instanceof IntVector) {
+        return (VectorValueComparator<T>) new IntComparator();
+      } else if (vector instanceof BigIntVector) {
+        return (VectorValueComparator<T>) new LongComparator();
+      } else if (vector instanceof Float4Vector) {
+        return (VectorValueComparator<T>) new Float4Comparator();
+      } else if (vector instanceof Float8Vector) {
+        return (VectorValueComparator<T>) new Float8Comparator();
+      }
+    } else if (vector instanceof BaseVariableWidthVector) {
+      return (VectorValueComparator<T>) new VariableWidthComparator();
+    }
+
+    throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+  }
+
+  /**
+   * Default comparator for bytes.
+   * The comparison is based on values, with null comes first.
+   */
+  public static class ByteComparator extends VectorValueComparator<TinyIntVector> {
+
+    public ByteComparator() {
+      super(Byte.SIZE / 8);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      byte value1 = vector1.get(index1);
+      byte value2 = vector2.get(index2);
+      return value1 - value2;
+    }
+  }
+
+  /**
+   * Default comparator for short integers.
+   * The comparison is based on values, with null comes first.
+   */
+  public static class ShortComparator extends VectorValueComparator<SmallIntVector> {
+
+    public ShortComparator() {
+      super(Short.SIZE / 8);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      short value1 = vector1.get(index1);
+      short value2 = vector2.get(index2);
+      return value1 - value2;
+    }
+  }
+
   /**
    * Default comparator for 32-bit integers.
    * The comparison is based on int values, with null comes first.
@@ -45,26 +117,109 @@ public int compareNotNull(int index1, int index2) {
   }
 
   /**
-   * Default comparator for varchars.
-   * The comparison is in lexicographic order, with null comes first.
+   * Default comparator for long integers.
+   * The comparison is based on values, with null comes first.
    */
-  public static class VarCharComparator extends VectorValueComparator<VarCharVector> {
+  public static class LongComparator extends VectorValueComparator<BigIntVector> {
 
-    private NullableVarCharHolder holder1 = new NullableVarCharHolder();
-    private NullableVarCharHolder holder2 = new NullableVarCharHolder();
+    public LongComparator() {
+      super(Long.SIZE / 8);
+    }
 
     @Override
     public int compareNotNull(int index1, int index2) {
-      vector1.get(index1, holder1);
-      vector2.get(index2, holder2);
+      long value1 = vector1.get(index1);
+      long value2 = vector2.get(index2);
+
+      return Long.signum(value1 - value2);
+    }
+  }
+
+  /**
+   * Default comparator for float type.
+   * The comparison is based on values, with null comes first.
+   */
+  public static class Float4Comparator extends VectorValueComparator<Float4Vector> {
+
+    public Float4Comparator() {
+      super(Float.SIZE / 8);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      float value1 = vector1.get(index1);
+      float value2 = vector2.get(index2);
+
+      boolean isNan1 = Float.isNaN(value1);
+      boolean isNan2 = Float.isNaN(value2);
+      if (isNan1 || isNan2) {
+        if (isNan1 && isNan2) {
+          return 0;
+        } else if (isNan1) {
+          // nan is greater than any normal value
+          return 1;
+        } else {
+          return -1;
+        }
+      }
+
+      return (int) Math.signum(value1 - value2);
+    }
+  }
+
+  /**
+   * Default comparator for double type.
+   * The comparison is based on values, with null comes first.
+   */
+  public static class Float8Comparator extends VectorValueComparator<Float8Vector> {
+
+    public Float8Comparator() {
+      super(Double.SIZE / 8);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      double value1 = vector1.get(index1);
+      double value2 = vector2.get(index2);
+
+      boolean isNan1 = Double.isNaN(value1);
+      boolean isNan2 = Double.isNaN(value2);
+      if (isNan1 || isNan2) {
+        if (isNan1 && isNan2) {
+          return 0;
+        } else if (isNan1) {
+          // nan is greater than any normal value
+          return 1;
+        } else {
+          return -1;
+        }
+      }
+
+      return (int) Math.signum(value1 - value2);
+    }
+  }
+
+  /**
+   * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
+   * The comparison is in lexicographic order, with null comes first.
+   */
+  public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
+      int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);
+
+      int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
+      int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);
 
-      int length1 = holder1.end - holder1.start;
-      int length2 = holder2.end - holder2.start;
+      int length1 = end1 - start1;
+      int length2 = end2 - start2;
 
       int minLength = length1 < length2 ? length1 : length2;
       for (int i = 0; i < minLength; i++) {
-        byte b1 = holder1.buffer.getByte(holder1.start + i);
-        byte b2 = holder2.buffer.getByte(holder2.start + i);
+        byte b1 = vector1.getDataBuffer().getByte(start1 + i);
+        byte b2 = vector2.getDataBuffer().getByte(start2 + i);
 
         if (b1 != b2) {
           return b1 - b2;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
new file mode 100644
index 00000000000..a8d2b703425
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+
+/**
+ * Default in-place sorter for fixed-width vectors.
+ * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class FixedWidthInPlaceVectorSorter<V extends BaseFixedWidthVector> implements InPlaceVectorSorter<V> {
+
+  private VectorValueComparator<V> comparator;
+
+  /**
+   * The vector to sort.
+   */
+  private V vec;
+
+  /**
+   * The buffer to hold the pivot.
+   * It always has length 1.
+   */
+  private V pivotBuffer;
+
+  @Override
+  public void sortInPlace(V vec, VectorValueComparator<V> comparator) {
+    try {
+      this.vec = vec;
+      this.comparator = comparator;
+      this.pivotBuffer = (V) vec.getField().createVector(vec.getAllocator());
+      this.pivotBuffer.allocateNew(1);
+
+      comparator.attachVectors(vec, pivotBuffer);
+      quickSort(0, vec.getValueCount() - 1);
+    } finally {
+      this.pivotBuffer.close();
+    }
+  }
+
+  private void quickSort(int low, int high) {
+    if (low < high) {
+      int mid = partition(low, high);
+      quickSort(low, mid - 1);
+      quickSort(mid + 1, high);
+    }
+  }
+
+  private int partition(int low, int high) {
+    pivotBuffer.copyFrom(low, 0, vec);
+
+    while (low < high) {
+      while (low < high && comparator.compare(high, 0) >= 0) {
+        high -= 1;
+      }
+      vec.copyFrom(high, low, vec);
+
+      while (low < high && comparator.compare(low, 0) <= 0) {
+        low += 1;
+      }
+      vec.copyFrom(low, high, vec);
+    }
+
+    vec.copyFrom(0, low, pivotBuffer);
+    return low;
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
new file mode 100644
index 00000000000..19817fe76b8
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Basic interface for sorting a vector in-place.
+ * That is, the sorting is performed by modifying the input vector,
+ * without creating a new sorted vector.
+ *
+ * @param <V> the vector type.
+ */
+public interface InPlaceVectorSorter<V extends ValueVector> {
+
+  /**
+   * Sort a vector in-place.
+   * @param vec the vector to sort.
+   * @param comparator the criteria for sort.
+   */
+  void sortInPlace(V vec, VectorValueComparator<V> comparator);
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
new file mode 100644
index 00000000000..02e2b20cc06
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.search;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}.
+ */
+public class TestVectorSearcher {
+
+  private final int VECTOR_LENGTH = 100;
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    allocator = new RootAllocator(1024 * 1024);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testBinarySearchInt() {
+    try (IntVector rawVector = new IntVector("", allocator);
+         IntVector negVector = new IntVector("", allocator)) {
+      rawVector.allocateNew(VECTOR_LENGTH);
+      rawVector.setValueCount(VECTOR_LENGTH);
+      negVector.allocateNew(1);
+      negVector.setValueCount(1);
+
+      // prepare data in sorted order
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        if (i == 0) {
+          rawVector.setNull(i);
+        } else {
+          rawVector.set(i, i);
+        }
+      }
+      negVector.set(0, -333);
+
+      // do search
+      VectorValueComparator<IntVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
+        assertEquals(i, result);
+      }
+
+      // negative case
+      assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0));
+    }
+  }
+
+  @Test
+  public void testLinearSearchInt() {
+    try (IntVector rawVector = new IntVector("", allocator);
+         IntVector negVector = new IntVector("", allocator)) {
+      rawVector.allocateNew(VECTOR_LENGTH);
+      rawVector.setValueCount(VECTOR_LENGTH);
+      negVector.allocateNew(1);
+      negVector.setValueCount(1);
+
+      // prepare data in sorted order
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        if (i == 0) {
+          rawVector.setNull(i);
+        } else {
+          rawVector.set(i, i);
+        }
+      }
+      negVector.set(0, -333);
+
+      // do search
+      VectorValueComparator<IntVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
+        assertEquals(i, result);
+      }
+
+      // negative case
+      assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0));
+    }
+  }
+
+  @Test
+  public void testBinarySearchVarChar() {
+    try (VarCharVector rawVector = new VarCharVector("", allocator);
+         VarCharVector negVector = new VarCharVector("", allocator)) {
+      rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH);
+      rawVector.setValueCount(VECTOR_LENGTH);
+      negVector.allocateNew(VECTOR_LENGTH, 1);
+      negVector.setValueCount(1);
+
+      byte[] content = new byte[2];
+
+      // prepare data in sorted order
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        if (i == 0) {
+          rawVector.setNull(i);
+        } else {
+          int q = i / 10;
+          int r = i % 10;
+
+          content[0] = (byte) ('a' + q);
+          content[1] = (byte) r;
+          rawVector.set(i, content);
+        }
+      }
+      negVector.set(0, "abcd".getBytes());
+
+      // do search
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
+        assertEquals(i, result);
+      }
+
+      // negative case
+      assertEquals(-1, VectorSearcher.binarySearch(rawVector, comparator, negVector, 0));
+    }
+  }
+
+  @Test
+  public void testLinearSearchVarChar() {
+    try (VarCharVector rawVector = new VarCharVector("", allocator);
+         VarCharVector negVector = new VarCharVector("", allocator)) {
+      rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH);
+      rawVector.setValueCount(VECTOR_LENGTH);
+      negVector.allocateNew(VECTOR_LENGTH, 1);
+      negVector.setValueCount(1);
+
+      byte[] content = new byte[2];
+
+      // prepare data in sorted order
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        if (i == 0) {
+          rawVector.setNull(i);
+        } else {
+          int q = i / 10;
+          int r = i % 10;
+
+          content[0] = (byte) ('a' + q);
+          content[1] = (byte) r;
+          rawVector.set(i, content);
+        }
+      }
+      negVector.set(0, "abcd".getBytes());
+
+      // do search
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
+      for (int i = 0; i < VECTOR_LENGTH; i++) {
+        int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
+        assertEquals(i, result);
+      }
+
+      // negative case
+      assertEquals(-1, VectorSearcher.linearSearch(rawVector, comparator, negVector, 0));
+    }
+  }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
new file mode 100644
index 00000000000..ecbf9faf0b2
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link FixedWidthInPlaceVectorSorter}.
+ */
+public class TestFixedWidthInPlaceVectorSorter {
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    allocator = new RootAllocator(1024 * 1024);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testSortInt() {
+    try (IntVector vec = new IntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10);
+      vec.set(1, 8);
+      vec.setNull(2);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, 23);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the vector
+      FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
+      VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      sorter.sortInPlace(vec, comparator);
+
+      // verify results
+      Assert.assertEquals(10, vec.getValueCount());
+
+      assertTrue(vec.isNull(0));
+      assertTrue(vec.isNull(1));
+      Assert.assertEquals(2, vec.get(2));
+      Assert.assertEquals(8, vec.get(3));
+      Assert.assertEquals(10, vec.get(4));
+      Assert.assertEquals(10, vec.get(5));
+      Assert.assertEquals(12, vec.get(6));
+      Assert.assertEquals(17, vec.get(7));
+      Assert.assertEquals(23, vec.get(8));
+      Assert.assertEquals(35, vec.get(9));
+    }
+  }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
index 9133ab6b15c..1dfe946017e 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -21,8 +21,13 @@
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
-import org.apache.arrow.vector.IntVector;
 
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -45,6 +50,100 @@ public void shutdown() {
     allocator.close();
   }
 
+  @Test
+  public void testSortByte() {
+    try (TinyIntVector vec = new TinyIntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10);
+      vec.set(1, 8);
+      vec.setNull(2);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, 23);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      TinyIntVector sortedVec =
+              (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals((byte) 2, sortedVec.get(2));
+      Assert.assertEquals((byte) 8, sortedVec.get(3));
+      Assert.assertEquals((byte) 10, sortedVec.get(4));
+      Assert.assertEquals((byte) 10, sortedVec.get(5));
+      Assert.assertEquals((byte) 12, sortedVec.get(6));
+      Assert.assertEquals((byte) 17, sortedVec.get(7));
+      Assert.assertEquals((byte) 23, sortedVec.get(8));
+      Assert.assertEquals((byte) 35, sortedVec.get(9));
+
+      sortedVec.close();
+    }
+  }
+
+  @Test
+  public void testSortShort() {
+    try (SmallIntVector vec = new SmallIntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10);
+      vec.set(1, 8);
+      vec.setNull(2);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, 23);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      SmallIntVector sortedVec =
+              (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals((short) 2, sortedVec.get(2));
+      Assert.assertEquals((short) 8, sortedVec.get(3));
+      Assert.assertEquals((short) 10, sortedVec.get(4));
+      Assert.assertEquals((short) 10, sortedVec.get(5));
+      Assert.assertEquals((short) 12, sortedVec.get(6));
+      Assert.assertEquals((short) 17, sortedVec.get(7));
+      Assert.assertEquals((short) 23, sortedVec.get(8));
+      Assert.assertEquals((short) 35, sortedVec.get(9));
+
+      sortedVec.close();
+    }
+  }
+
   @Test
   public void testSortInt() {
     try (IntVector vec = new IntVector("", allocator)) {
@@ -65,7 +164,7 @@ public void testSortInt() {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+      VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
       sortedVec.allocateNew(vec.getValueCount());
@@ -90,4 +189,142 @@ public void testSortInt() {
       sortedVec.close();
     }
   }
+
+  @Test
+  public void testSortLong() {
+    try (BigIntVector vec = new BigIntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10L);
+      vec.set(1, 8L);
+      vec.setNull(2);
+      vec.set(3, 10L);
+      vec.set(4, 12L);
+      vec.set(5, 17L);
+      vec.setNull(6);
+      vec.set(7, 23L);
+      vec.set(8, 1L << 35L);
+      vec.set(9, 2L);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals(2L, sortedVec.get(2));
+      Assert.assertEquals(8L, sortedVec.get(3));
+      Assert.assertEquals(10L, sortedVec.get(4));
+      Assert.assertEquals(10L, sortedVec.get(5));
+      Assert.assertEquals(12L, sortedVec.get(6));
+      Assert.assertEquals(17L, sortedVec.get(7));
+      Assert.assertEquals(23L, sortedVec.get(8));
+      Assert.assertEquals(1L << 35L, sortedVec.get(9));
+
+      sortedVec.close();
+    }
+  }
+
+  @Test
+  public void testSortFloat() {
+    try (Float4Vector vec = new Float4Vector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10f);
+      vec.set(1, 8f);
+      vec.setNull(2);
+      vec.set(3, 10f);
+      vec.set(4, 12f);
+      vec.set(5, 17f);
+      vec.setNull(6);
+      vec.set(7, 23f);
+      vec.set(8, Float.NaN);
+      vec.set(9, 2f);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals(2f, sortedVec.get(2), 0f);
+      Assert.assertEquals(8f, sortedVec.get(3), 0f);
+      Assert.assertEquals(10f, sortedVec.get(4), 0f);
+      Assert.assertEquals(10f, sortedVec.get(5), 0f);
+      Assert.assertEquals(12f, sortedVec.get(6), 0f);
+      Assert.assertEquals(17f, sortedVec.get(7), 0f);
+      Assert.assertEquals(23f, sortedVec.get(8), 0f);
+      Assert.assertEquals(Float.NaN, sortedVec.get(9), 0f);
+
+      sortedVec.close();
+    }
+  }
+
+  @Test
+  public void testSortDobule() {
+    try (Float8Vector vec = new Float8Vector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10);
+      vec.set(1, 8);
+      vec.setNull(2);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, Double.NaN);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
+
+      Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals(2, sortedVec.get(2), 0);
+      Assert.assertEquals(8, sortedVec.get(3), 0);
+      Assert.assertEquals(10, sortedVec.get(4), 0);
+      Assert.assertEquals(10, sortedVec.get(5), 0);
+      Assert.assertEquals(12, sortedVec.get(6), 0);
+      Assert.assertEquals(17, sortedVec.get(7), 0);
+      Assert.assertEquals(35, sortedVec.get(8), 0);
+      Assert.assertEquals(Double.NaN, sortedVec.get(9), 0);
+
+      sortedVec.close();
+    }
+  }
 }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
index 68be2549de4..46b30602177 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -22,6 +22,7 @@
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.junit.After;
 import org.junit.Assert;
@@ -65,7 +66,8 @@ public void testSortString() {
 
       // sort the vector
       VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(vec);
 
       VarCharVector sortedVec =
               (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index 340c428b81a..6d8cb5ce12b 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -11,7 +11,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.14.0</version>
+    <version>1.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-flight</artifactId>
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java b/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
index ee45cef24d3..e805917cd8f 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/FlightService.java
@@ -191,6 +191,7 @@ public StreamObserver<ArrowMessage> doPutCustom(final StreamObserver<Flight.PutR
             StreamPipe.wrap(responseObserver, PutResult::toProtocol)).run();
         responseObserver.onCompleted();
       } catch (Exception ex) {
+        logger.error("Failed to process custom put.", ex);
         responseObserver.onError(ex);
         // The client may have terminated, so the exception here is effectively swallowed.
         // Log the error as well so -something- makes it to the developer.
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java b/java/flight/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java
index f916c9217d0..9b8034003cd 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/auth/ClientAuthWrapper.java
@@ -18,6 +18,9 @@
 package org.apache.arrow.flight.auth;
 
 import java.util.Iterator;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.LinkedBlockingQueue;
 
 import org.apache.arrow.flight.auth.ClientAuthHandler.ClientAuthSender;
@@ -25,9 +28,9 @@
 import org.apache.arrow.flight.impl.Flight.HandshakeResponse;
 import org.apache.arrow.flight.impl.FlightServiceGrpc.FlightServiceStub;
 
-import com.google.common.base.Throwables;
 import com.google.protobuf.ByteString;
 
+import io.grpc.StatusRuntimeException;
 import io.grpc.stub.StreamObserver;
 
 /**
@@ -45,7 +48,17 @@ public static void doClientAuth(ClientAuthHandler authHandler, FlightServiceStub
     AuthObserver observer = new AuthObserver();
     observer.responseObserver = stub.handshake(observer);
     authHandler.authenticate(observer.sender, observer.iter);
-    observer.responseObserver.onCompleted();
+    if (!observer.sender.errored) {
+      observer.responseObserver.onCompleted();
+    }
+    try {
+      if (!observer.completed.get()) {
+        // TODO: ARROW-5681
+        throw new RuntimeException("Unauthenticated");
+      }
+    } catch (InterruptedException | ExecutionException e) {
+      throw new RuntimeException(e);
+    }
   }
 
   private static class AuthObserver implements StreamObserver<HandshakeResponse> {
@@ -53,11 +66,11 @@ private static class AuthObserver implements StreamObserver<HandshakeResponse> {
     private volatile StreamObserver<HandshakeRequest> responseObserver;
     private final LinkedBlockingQueue<byte[]> messages = new LinkedBlockingQueue<>();
     private final AuthSender sender = new AuthSender();
-    private volatile boolean completed = false;
-    private Throwable ex = null;
+    private CompletableFuture<Boolean> completed;
 
     public AuthObserver() {
       super();
+      completed = new CompletableFuture<>();
     }
 
     @Override
@@ -72,7 +85,7 @@ public void onNext(HandshakeResponse value) {
 
       @Override
       public byte[] next() {
-        while (ex == null && (!completed || !messages.isEmpty())) {
+        while (!completed.isDone() || !messages.isEmpty()) {
           byte[] bytes = messages.poll();
           if (bytes == null) {
             // busy wait.
@@ -82,8 +95,19 @@ public byte[] next() {
           }
         }
 
-        if (ex != null) {
-          throw Throwables.propagate(ex);
+        if (completed.isCompletedExceptionally()) {
+          // Preserve prior exception behavior
+          // TODO: with ARROW-5681, throw an appropriate Flight exception if gRPC raised an exception
+          try {
+            completed.get();
+          } catch (InterruptedException e) {
+            throw new RuntimeException(e);
+          } catch (ExecutionException e) {
+            if (e.getCause() instanceof StatusRuntimeException) {
+              throw (StatusRuntimeException) e.getCause();
+            }
+            throw new RuntimeException(e);
+          }
         }
 
         throw new IllegalStateException("You attempted to retrieve messages after there were none.");
@@ -97,11 +121,13 @@ public boolean hasNext() {
 
     @Override
     public void onError(Throwable t) {
-      ex = t;
+      completed.completeExceptionally(t);
     }
 
     private class AuthSender implements ClientAuthSender {
 
+      private boolean errored = false;
+
       @Override
       public void send(byte[] payload) {
         responseObserver.onNext(HandshakeRequest.newBuilder()
@@ -111,6 +137,8 @@ public void send(byte[] payload) {
 
       @Override
       public void onError(String message, Throwable cause) {
+        this.errored = true;
+        Objects.requireNonNull(cause);
         responseObserver.onError(cause);
       }
 
@@ -118,7 +146,7 @@ public void onError(String message, Throwable cause) {
 
     @Override
     public void onCompleted() {
-      completed = true;
+      completed.complete(true);
     }
   }
 
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java b/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java
index a19126b6ae9..0507d3b72fd 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthHandler.java
@@ -36,6 +36,8 @@ public interface ServerAuthHandler {
   /**
    * Handle the initial handshake with the client.
    *
+   * @param outgoing A writer to send messages to the client.
+   * @param incoming An iterator of messages from the client.
    * @return true if client is authenticated, false otherwise.
    */
   boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming);
diff --git a/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
index f0c5dae757a..a3c698b53bf 100644
--- a/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
+++ b/java/flight/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
@@ -58,6 +58,7 @@ public static StreamObserver<HandshakeRequest> wrapHandshake(ServerAuthHandler a
 
         responseObserver.onError(Status.PERMISSION_DENIED.asException());
       } catch (Exception ex) {
+        ex.printStackTrace();
         responseObserver.onError(ex);
       }
     };
@@ -109,6 +110,7 @@ public boolean hasNext() {
 
     @Override
     public void onError(Throwable t) {
+      completed = true;
       while (future == null) {/* busy wait */}
       future.cancel(true);
     }
diff --git a/java/flight/src/test/java/org/apache/arrow/flight/TestAuth.java b/java/flight/src/test/java/org/apache/arrow/flight/TestAuth.java
new file mode 100644
index 00000000000..bfaf660b26b
--- /dev/null
+++ b/java/flight/src/test/java/org/apache/arrow/flight/TestAuth.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Iterator;
+import java.util.Optional;
+
+import org.apache.arrow.flight.auth.ClientAuthHandler;
+import org.apache.arrow.flight.auth.ServerAuthHandler;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+
+import org.junit.Test;
+
+public class TestAuth {
+
+  /** An auth handler that does not send messages should not block the server forever. */
+  @Test(expected = RuntimeException.class)
+  public void noMessages() throws Exception {
+    try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+        final FlightServer s = FlightTestUtil
+            .getStartedServer(
+                location -> FlightServer.builder(allocator, location, new NoOpFlightProducer()).authHandler(
+                    new OneshotAuthHandler()).build());
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      client.authenticate(new ClientAuthHandler() {
+        @Override
+        public void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming) {
+        }
+
+        @Override
+        public byte[] getCallToken() {
+          return new byte[0];
+        }
+      });
+    }
+  }
+
+  /** An auth handler that sends an error should not block the server forever. */
+  @Test(expected = RuntimeException.class)
+  public void clientError() throws Exception {
+    try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+        final FlightServer s = FlightTestUtil
+            .getStartedServer(
+                location -> FlightServer.builder(allocator, location, new NoOpFlightProducer()).authHandler(
+                    new OneshotAuthHandler()).build());
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      client.authenticate(new ClientAuthHandler() {
+        @Override
+        public void authenticate(ClientAuthSender outgoing, Iterator<byte[]> incoming) {
+          outgoing.send(new byte[0]);
+          // Ensure the server-side runs
+          incoming.next();
+          outgoing.onError("test", new RuntimeException("test"));
+        }
+
+        @Override
+        public byte[] getCallToken() {
+          return new byte[0];
+        }
+      });
+    }
+  }
+
+  private static class OneshotAuthHandler implements ServerAuthHandler {
+
+    @Override
+    public Optional<String> isValid(byte[] token) {
+      return Optional.of("test");
+    }
+
+    @Override
+    public boolean authenticate(ServerAuthSender outgoing, Iterator<byte[]> incoming) {
+      incoming.next();
+      outgoing.send(new byte[0]);
+      return false;
+    }
+  }
+}
diff --git a/java/flight/src/test/java/org/apache/arrow/flight/TestServerOptions.java b/java/flight/src/test/java/org/apache/arrow/flight/TestServerOptions.java
index e3ac3908941..c745de912f3 100644
--- a/java/flight/src/test/java/org/apache/arrow/flight/TestServerOptions.java
+++ b/java/flight/src/test/java/org/apache/arrow/flight/TestServerOptions.java
@@ -38,6 +38,9 @@ public void domainSocket() throws Exception {
     Assume.assumeTrue("We have a native transport available", FlightTestUtil.isNativeTransportAvailable());
     final File domainSocket = File.createTempFile("flight-unit-test-", ".sock");
     Assert.assertTrue(domainSocket.delete());
+    // Domain socket paths have a platform-dependent limit. Set a conservative limit and skip the test if the temporary
+    // file name is too long. (We do not assume a particular platform-dependent temporary directory path.)
+    Assume.assumeTrue("The domain socket path is not too long", domainSocket.getAbsolutePath().length() < 100);
     final Location location = Location.forGrpcDomainSocket(domainSocket.getAbsolutePath());
     try (
         BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
diff --git a/java/flight/src/test/java/org/apache/arrow/flight/auth/TestAuth.java b/java/flight/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
similarity index 99%
rename from java/flight/src/test/java/org/apache/arrow/flight/auth/TestAuth.java
rename to java/flight/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
index 54bbadb0369..9fe6b04140c 100644
--- a/java/flight/src/test/java/org/apache/arrow/flight/auth/TestAuth.java
+++ b/java/flight/src/test/java/org/apache/arrow/flight/auth/TestBasicAuth.java
@@ -48,7 +48,7 @@
 
 import io.grpc.StatusRuntimeException;
 
-public class TestAuth {
+public class TestBasicAuth {
   final String PERMISSION_DENIED = "PERMISSION_DENIED";
 
   private static final String USERNAME = "flight";
diff --git a/java/format/pom.xml b/java/format/pom.xml
index a92ff1aebed..07902e8a899 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>0.14.0</version>
+  <version>1.0.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index b825e554c0d..02734d6423f 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -14,7 +14,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>0.14.0</version>
+      <version>1.0.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
index ef1d63ae29b..520ef5f443e 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniWrapper.java
@@ -48,6 +48,7 @@ native long buildProjector(byte[] schemaBuf, byte[] exprListBuf,
    * Evaluate the expressions represented by the moduleId on a record batch
    * and store the output in ValueVectors. Throws an exception in case of errors
    *
+   * @param expander VectorExpander object. Used for callbacks from cpp.
    * @param moduleId moduleId representing expressions. Created using a call to
    *                 buildNativeCode
    * @param numRows Number of rows in the record batch
@@ -61,7 +62,7 @@ native long buildProjector(byte[] schemaBuf, byte[] exprListBuf,
    * @param outSizes The allocated size of the output buffers. On successful evaluation,
    *                 the result is stored in the output buffers
    */
-  native void evaluateProjector(long moduleId, int numRows,
+  native void evaluateProjector(Object expander, long moduleId, int numRows,
                                 long[] bufAddrs, long[] bufSizes,
                                 int selectionVectorType, int selectionVectorSize,
                                 long selectionVectorBufferAddr, long selectionVectorBufferSize,
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
index ae93fba5991..c15d474a282 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java
@@ -27,8 +27,10 @@
 import org.apache.arrow.gandiva.expression.ExpressionTree;
 import org.apache.arrow.gandiva.ipc.GandivaTypes;
 import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
+import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.FixedWidthVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VariableWidthVector;
 import org.apache.arrow.vector.ipc.message.ArrowBuffer;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -235,26 +237,43 @@ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buf
       bufSizes[idx++] = bufLayout.getSize();
     }
 
-    long[] outAddrs = new long[2 * outColumns.size()];
-    long[] outSizes = new long[2 * outColumns.size()];
+    boolean hasVariableWidthColumns = false;
+    BaseVariableWidthVector[] resizableVectors = new BaseVariableWidthVector[outColumns.size()];
+    long[] outAddrs = new long[3 * outColumns.size()];
+    long[] outSizes = new long[3 * outColumns.size()];
     idx = 0;
+    int outColumnIdx = 0;
     for (ValueVector valueVector : outColumns) {
-      if (!(valueVector instanceof FixedWidthVector)) {
-        throw new UnsupportedTypeException("Unsupported value vector type");
+      boolean isFixedWith = valueVector instanceof FixedWidthVector;
+      boolean isVarWidth = valueVector instanceof VariableWidthVector;
+      if (!isFixedWith && !isVarWidth) {
+        throw new UnsupportedTypeException(
+            "Unsupported value vector type " + valueVector.getField().getFieldType());
       }
 
       outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress();
       outSizes[idx++] = valueVector.getValidityBuffer().capacity();
+      if (isVarWidth) {
+        outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress();
+        outSizes[idx++] = valueVector.getOffsetBuffer().capacity();
+        hasVariableWidthColumns = true;
+
+        // save vector to allow for resizing.
+        resizableVectors[outColumnIdx] = (BaseVariableWidthVector)valueVector;
+      }
       outAddrs[idx] = valueVector.getDataBuffer().memoryAddress();
       outSizes[idx++] = valueVector.getDataBuffer().capacity();
 
       valueVector.setValueCount(selectionVectorRecordCount);
+      outColumnIdx++;
     }
 
-    wrapper.evaluateProjector(this.moduleId, numRows, bufAddrs, bufSizes,
-            selectionVectorType, selectionVectorRecordCount,
-            selectionVectorAddr, selectionVectorSize, 
-            outAddrs, outSizes);
+    wrapper.evaluateProjector(
+        hasVariableWidthColumns ? new VectorExpander(resizableVectors) : null,
+        this.moduleId, numRows, bufAddrs, bufSizes,
+        selectionVectorType, selectionVectorRecordCount,
+        selectionVectorAddr, selectionVectorSize,
+        outAddrs, outSizes);
   }
 
   /**
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java
new file mode 100644
index 00000000000..2414144a853
--- /dev/null
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/VectorExpander.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.vector.BaseVariableWidthVector;
+
+/**
+ * This class provides the functionality to expand output vectors using a callback mechanism from
+ * gandiva.
+ */
+public class VectorExpander {
+  private final BaseVariableWidthVector[] vectors;
+
+  public VectorExpander(BaseVariableWidthVector[] vectors) {
+    this.vectors = vectors;
+  }
+
+  /**
+   * Result of vector expansion.
+   */
+  public static class ExpandResult {
+    public long address;
+    public int capacity;
+
+    public ExpandResult(long address, int capacity) {
+      this.address = address;
+      this.capacity = capacity;
+    }
+  }
+
+  /**
+   * Expand vector at specified index. This is used as a back call from jni, and is only
+   * relevant for variable width vectors.
+   *
+   * @param index index of buffer in the list passed to jni.
+   * @param toCapacity the size to which the buffer should be expanded to.
+   *
+   * @return address and size  of the buffer after expansion.
+   */
+  public ExpandResult expandOutputVectorAtIndex(int index, int toCapacity) {
+    if (index >= vectors.length || vectors[index] == null) {
+      throw new IllegalArgumentException("invalid index " + index);
+    }
+
+    BaseVariableWidthVector vector = vectors[index];
+    while (vector.getDataBuffer().capacity() < toCapacity) {
+      vector.reallocDataBuffer();
+    }
+    return new ExpandResult(
+        vector.getDataBuffer().memoryAddress(),
+        vector.getDataBuffer().capacity());
+  }
+
+}
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
index 47d4eb74c86..c37a19012f1 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
@@ -23,6 +23,7 @@
 import org.apache.arrow.gandiva.exceptions.GandivaException;
 import org.apache.arrow.gandiva.exceptions.UnsupportedTypeException;
 import org.apache.arrow.gandiva.ipc.GandivaTypes;
+import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -114,6 +115,9 @@ private static void initArrowTypeFloat(
 
   private static void initArrowTypeDecimal(ArrowType.Decimal decimalType,
                                            GandivaTypes.ExtGandivaType.Builder builder) {
+    Preconditions.checkArgument(decimalType.getPrecision() > 0 &&
+            decimalType.getPrecision() <= 38, "Gandiva only supports decimals of upto 38 " +
+            "precision. Input precision : " + decimalType.getPrecision());
     builder.setPrecision(decimalType.getPrecision());
     builder.setScale(decimalType.getScale());
     builder.setType(GandivaTypes.GandivaType.DECIMAL);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
index 9384cd4d363..c774b0450e0 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/BaseEvaluatorTest.java
@@ -32,6 +32,7 @@
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
@@ -243,6 +244,17 @@ DecimalVector decimalVector(String[] values, int precision, int scale) {
     return vector;
   }
 
+  VarCharVector varcharVector(String[] values) {
+    VarCharVector vector = new VarCharVector("VarCharVector" + Math.random(), allocator);
+    vector.allocateNew();
+    for (int i = 0; i < values.length; i++) {
+      vector.setSafe(i, values[i].getBytes(), 0, values[i].length());
+    }
+
+    vector.setValueCount(values.length);
+    return vector;
+  }
+
   ArrowBuf longBuf(long[] longs) {
     ArrowBuf buffer = allocator.buffer(longs.length * 8);
     for (int i = 0; i < longs.length; i++) {
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
index 63a7536bce0..99ae5e466d4 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistryTest.java
@@ -43,4 +43,14 @@ public void testFunctions() throws GandivaException {
     Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
     Assert.assertTrue(functions.contains(signature));
   }
+
+  @Test
+  public void testFunctionAliases() throws GandivaException {
+    ArrowType.Int int64 = new ArrowType.Int(64, true);
+    FunctionSignature signature =
+        new FunctionSignature("modulo", int64, Lists.newArrayList(int64, int64));
+    Set<FunctionSignature> functions = ExpressionRegistry.getInstance().getSupportedFunctions();
+    Assert.assertTrue(functions.contains(signature));
+  }
+
 }
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
index aaacffdd3dc..37cc49be71e 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
@@ -35,17 +35,22 @@
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import com.google.common.collect.Lists;
 
 public class ProjectorDecimalTest extends org.apache.arrow.gandiva.evaluator.BaseEvaluatorTest {
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
 
   @Test
   public void test_add() throws GandivaException {
@@ -528,8 +533,8 @@ public void testCastToLong() throws GandivaException {
     List<ValueVector> output = null;
     ArrowRecordBatch batch = null;
     try {
-      int numRows = 4;
-      String[] aValues = new String[]{"1.23", "1.58", "-1.23", "-1.58"};
+      int numRows = 5;
+      String[] aValues = new String[]{"1.23", "1.50", "98765.78", "-1.23", "-1.58"};
       DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
       batch = new ArrowRecordBatch(
           numRows,
@@ -551,7 +556,7 @@ public void testCastToLong() throws GandivaException {
       eval.evaluate(batch, output);
 
       // compare the outputs.
-      long[] expected = {1, 1, -1, -1};
+      long[] expected = {1, 2, 98766, -1, -2};
       for (int i = 0; i < numRows; i++) {
         assertFalse(resultVector.isNull(i));
         assertEquals(expected[i], resultVector.get(i));
@@ -625,4 +630,168 @@ public void testCastToDouble() throws GandivaException {
       eval.close();
     }
   }
+
+  @Test
+  public void testCastToString() throws GandivaException {
+    Decimal decimalType = new Decimal(38, 2);
+    Field dec = Field.nullable("dec", decimalType);
+    Field str = Field.nullable("str", new ArrowType.Utf8());
+    TreeNode field = TreeBuilder.makeField(dec);
+    TreeNode literal = TreeBuilder.makeLiteral(5L);
+    List<TreeNode> args = Lists.newArrayList(field, literal);
+    TreeNode cast = TreeBuilder.makeFunction("castVARCHAR", args, new ArrowType.Utf8());
+    TreeNode root = TreeBuilder.makeFunction("equal",
+        Lists.newArrayList(cast, TreeBuilder.makeField(str)), new ArrowType.Bool());
+    ExpressionTree tree = TreeBuilder.makeExpression(root, Field.nullable("are_equal", new ArrowType.Bool()));
+
+    Schema schema = new Schema(Lists.newArrayList(dec, str));
+    Projector eval = Projector.make(schema, Lists.newArrayList(tree)
+    );
+
+    List<ValueVector> output = null;
+    ArrowRecordBatch batch = null;
+    try {
+      int numRows = 4;
+      String[] aValues = new String[]{"10.51", "100.23", "-1000.23", "-0000.10"};
+      String[] expected = {"10.51", "100.2", "-1000", "-0.10"};
+      DecimalVector valuesa = decimalVector(aValues, decimalType.getPrecision(), decimalType.getScale());
+      VarCharVector result = varcharVector(expected);
+      batch = new ArrowRecordBatch(
+          numRows,
+          Lists.newArrayList(
+              new ArrowFieldNode(numRows, 0)
+          ),
+          Lists.newArrayList(
+              valuesa.getValidityBuffer(),
+              valuesa.getDataBuffer(),
+              result.getValidityBuffer(),
+              result.getOffsetBuffer(),
+              result.getDataBuffer()
+          )
+      );
+
+      BitVector resultVector = new BitVector("res", allocator);
+      resultVector.allocateNew();
+      output = new ArrayList<>(Arrays.asList(resultVector));
+
+      // evaluate expressions.
+      eval.evaluate(batch, output);
+
+      // compare the outputs.
+      for (int i = 0; i < numRows; i++) {
+        assertTrue(resultVector.getObject(i).booleanValue());
+      }
+    } finally {
+      // free buffers
+      if (batch != null) {
+        releaseRecordBatch(batch);
+      }
+      if (output != null) {
+        releaseValueVectors(output);
+      }
+      eval.close();
+    }
+  }
+
+  @Test
+  public void testCastStringToDecimal() throws GandivaException {
+    Decimal decimalType = new Decimal(4, 2);
+    Field dec = Field.nullable("dec", decimalType);
+
+    Field str = Field.nullable("str", new ArrowType.Utf8());
+    TreeNode field = TreeBuilder.makeField(str);
+    List<TreeNode> args = Lists.newArrayList(field);
+    TreeNode cast = TreeBuilder.makeFunction("castDECIMAL", args, decimalType);
+    ExpressionTree tree = TreeBuilder.makeExpression(cast, Field.nullable("dec_str", decimalType));
+
+    Schema schema = new Schema(Lists.newArrayList(str));
+    Projector eval = Projector.make(schema, Lists.newArrayList(tree)
+    );
+
+    List<ValueVector> output = null;
+    ArrowRecordBatch batch = null;
+    try {
+      int numRows = 4;
+      String[] aValues = new String[]{"10.5134", "-0.1", "10.516", "-1000"};
+      VarCharVector valuesa = varcharVector(aValues);
+      batch = new ArrowRecordBatch(
+          numRows,
+          Lists.newArrayList(
+              new ArrowFieldNode(numRows, 0)
+          ),
+          Lists.newArrayList(
+              valuesa.getValidityBuffer(),
+              valuesa.getOffsetBuffer(),
+              valuesa.getDataBuffer()
+          )
+      );
+
+      DecimalVector resultVector = new DecimalVector("res", allocator,
+          decimalType.getPrecision(), decimalType.getScale());
+      resultVector.allocateNew();
+      output = new ArrayList<>(Arrays.asList(resultVector));
+
+      BigDecimal[] expected = {BigDecimal.valueOf(10.51), BigDecimal.valueOf(-0.10),
+          BigDecimal.valueOf(10.52), BigDecimal.valueOf(0.00)};
+      // evaluate expressions.
+      eval.evaluate(batch, output);
+
+      // compare the outputs.
+      for (int i = 0; i < numRows; i++) {
+        assertTrue("mismatch in result for " +
+            "field " + resultVector.getField().getName() +
+            " for row " + i +
+            " expected " + expected[i] +
+            ", got " + resultVector.getObject(i),expected[i].compareTo(resultVector.getObject(i)) == 0);
+      }
+    } finally {
+      // free buffers
+      if (batch != null) {
+        releaseRecordBatch(batch);
+      }
+      if (output != null) {
+        releaseValueVectors(output);
+      }
+      eval.close();
+    }
+  }
+
+  @Test
+  public void testInvalidDecimal() throws GandivaException {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Gandiva only supports decimals of upto 38 precision. Input precision" +
+            " : 0");
+    Decimal decimalType = new Decimal(0, 0);
+    Field int64f = Field.nullable("int64", int64);
+
+    Schema schema = new Schema(Lists.newArrayList(int64f));
+    Projector eval = Projector.make(schema,
+            Lists.newArrayList(
+                    TreeBuilder.makeExpression("castDECIMAL",
+                            Lists.newArrayList(int64f),
+                            Field.nullable("invalid_dec", decimalType)
+                    )
+            )
+    );
+  }
+
+  @Test
+  public void testInvalidDecimalGt38() throws GandivaException {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("Gandiva only supports decimals of upto 38 precision. Input precision" +
+            " : 42");
+    Decimal decimalType = new Decimal(42, 0);
+    Field int64f = Field.nullable("int64", int64);
+
+    Schema schema = new Schema(Lists.newArrayList(int64f));
+    Projector eval = Projector.make(schema,
+            Lists.newArrayList(
+                    TreeBuilder.makeExpression("castDECIMAL",
+                            Lists.newArrayList(int64f),
+                            Field.nullable("invalid_dec", decimalType)
+                    )
+            )
+    );
+  }
 }
+
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index 62a12710cc7..52eeb165a4d 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -38,6 +38,7 @@
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.types.DateUnit;
@@ -48,7 +49,9 @@
 
 import org.junit.Assert;
 import org.junit.Ignore;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
@@ -60,6 +63,9 @@ public class ProjectorTest extends BaseEvaluatorTest {
   private Charset utf8Charset = Charset.forName("UTF-8");
   private Charset utf16Charset = Charset.forName("UTF-16");
 
+  @Rule
+  public ExpectedException thrown = ExpectedException.none();
+
   List<ArrowBuf> varBufs(String[] strings, Charset charset) {
     ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
     ArrowBuf dataBuffer = allocator.buffer(strings.length * 8);
@@ -516,6 +522,72 @@ public void testStringFields() throws GandivaException {
     eval.close();
   }
 
+  @Test
+  public void testStringOutput() throws GandivaException {
+    /*
+     * if (x >= 0) "hi" else "bye"
+     */
+
+    Field x = Field.nullable("x", new ArrowType.Int(32, true));
+
+    ArrowType retType = new ArrowType.Utf8();
+
+    TreeNode ifHiBye = TreeBuilder.makeIf(
+        TreeBuilder.makeFunction(
+            "greater_than_or_equal_to",
+            Lists.newArrayList(
+                TreeBuilder.makeField(x),
+                TreeBuilder.makeLiteral(0)
+            ),
+            boolType),
+        TreeBuilder.makeStringLiteral("hi"),
+        TreeBuilder.makeStringLiteral("bye"),
+        retType);
+
+    ExpressionTree expr = TreeBuilder.makeExpression(ifHiBye, Field.nullable("res", retType));
+    Schema schema = new Schema(Lists.newArrayList(x));
+    Projector eval = Projector.make(schema, Lists.newArrayList(expr));
+
+    // fill up input record batch
+    int numRows = 4;
+    byte[] validity = new byte[]{(byte) 255, 0};
+    int[] xValues = new int[]{10, -10, 20, -20};
+    String[] expected = new String[]{"hi", "bye", "hi", "bye"};
+    ArrowBuf validityX = buf(validity);
+    ArrowBuf dataX = intBuf(xValues);
+    ArrowRecordBatch batch =
+        new ArrowRecordBatch(
+            numRows,
+            Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
+            Lists.newArrayList( validityX, dataX));
+
+    // allocate data for output vector.
+    VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+    outVector.allocateNew(64, numRows);
+
+
+    // evaluate expression
+    List<ValueVector> output = new ArrayList<>();
+    output.add(outVector);
+    eval.evaluate(batch, output);
+
+    // match expected output.
+    for (int i = 0; i < numRows; i++) {
+      assertFalse(outVector.isNull(i));
+      assertEquals(expected[i], new String(outVector.get(i)));
+    }
+
+    // test with insufficient data buffer.
+    try {
+      outVector.allocateNew(4, numRows);
+      eval.evaluate(batch, output);
+    } finally {
+      releaseRecordBatch(batch);
+      releaseValueVectors(output);
+      eval.close();
+    }
+  }
+
   @Test
   public void testRegex() throws GandivaException {
     /*
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 745c395f817..048d3f91d2c 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.14.0</version>
+    <version>1.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
index cb23c2eaa27..2699b6a464f 100644
--- a/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
+++ b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java
@@ -18,7 +18,7 @@
 package org.apache.arrow.memory;
 
 /**
- * Helper Iface to generify a value to be included in the map where
+ * Helper interface to generify a value to be included in the map where
  * key is part of the value.
  *
  * @param <K> The type of the key.
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 5e1d04df94a..0b42d07880b 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>0.14.0</version>
+        <version>1.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-performance</artifactId>
     <packaging>jar</packaging>
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
new file mode 100644
index 00000000000..e7a0b20c35c
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Benchmarks for {@link BitVectorHelper}.
+ */
+@State(Scope.Benchmark)
+public class BitVectorHelperBenchmarks {
+
+  private static final int VALIDITY_BUFFER_CAPACITY = 1024;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private ArrowBuf validityBuffer;
+
+  private ArrowBuf oneBitValidityBuffer;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    validityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+
+    for (int i = 0;i < VALIDITY_BUFFER_CAPACITY; i++) {
+      if (i % 7 == 0) {
+        BitVectorHelper.setValidityBit(validityBuffer, i, (byte) 1);
+      } else {
+        BitVectorHelper.setValidityBit(validityBuffer, i, (byte) 0);
+      }
+    }
+
+    // only one 1 bit in the middle of the buffer
+    oneBitValidityBuffer = allocator.buffer(VALIDITY_BUFFER_CAPACITY / 8);
+    oneBitValidityBuffer.setZero(0, VALIDITY_BUFFER_CAPACITY / 8);
+    BitVectorHelper.setValidityBit(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY / 2, (byte) 1);
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    validityBuffer.close();
+    oneBitValidityBuffer.close();
+    allocator.close();
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public int getNullCountBenchmark() {
+    return BitVectorHelper.getNullCount(validityBuffer, VALIDITY_BUFFER_CAPACITY);
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public boolean allBitsNullBenchmark() {
+    return BitVectorHelper.checkAllBitsEqualTo(oneBitValidityBuffer, VALIDITY_BUFFER_CAPACITY, true);
+  }
+
+  //@Test
+  public static void main(String [] args) throws RunnerException {
+    Options opt = new OptionsBuilder()
+            .include(BitVectorHelperBenchmarks.class.getSimpleName())
+            .forks(1)
+            .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
new file mode 100644
index 00000000000..aaa8deb417b
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Benchmarks for {@link DecimalVector}.
+ */
+@State(Scope.Benchmark)
+public class DecimalVectorBenchmarks {
+
+  private static final int VECTOR_LENGTH = 1024;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private DecimalVector vector;
+
+  private ArrowBuf fromBuf;
+
+  byte[] fromByteArray;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    vector = new DecimalVector("vector", allocator, 38, 16);
+    vector.allocateNew(VECTOR_LENGTH);
+
+    fromBuf = allocator.buffer(VECTOR_LENGTH * DecimalVector.TYPE_WIDTH);
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      byte[] bytes = BigDecimal.valueOf(i).unscaledValue().toByteArray();
+      fromBuf.setBytes(i * DecimalVector.TYPE_WIDTH, bytes);
+    }
+
+    fromByteArray = new byte[DecimalVector.TYPE_WIDTH];
+    fromBuf.getBytes(0, fromByteArray);
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    fromBuf.close();
+    vector.close();
+    allocator.close();
+  }
+
+  /**
+   * Test writing on {@link DecimalVector} from arrow buf.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void setBigEndianArrowBufBenchmark() {
+    int offset = 0;
+
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.setBigEndianSafe(i, offset, fromBuf, DecimalVector.TYPE_WIDTH);
+      offset += 8;
+    }
+  }
+
+  /**
+   * Test writing on {@link DecimalVector} from byte array.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void setBigEndianByteArrayBenchmark() {
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.setBigEndian(i, fromByteArray);
+    }
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(DecimalVectorBenchmarks.class.getSimpleName())
+        .forks(1)
+        .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java
index 9ab6e375eaf..4617f5bf9bc 100644
--- a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java
+++ b/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java
@@ -50,6 +50,8 @@ public class Float8Benchmarks {
 
   private Float8Vector vector;
 
+  private Float8Vector fromVector;
+
   /**
    * Setup benchmarks.
    */
@@ -58,6 +60,18 @@ public void prepare() {
     allocator = new RootAllocator(ALLOCATOR_CAPACITY);
     vector = new Float8Vector("vector", allocator);
     vector.allocateNew(VECTOR_LENGTH);
+
+    fromVector = new Float8Vector("vector", allocator);
+    fromVector.allocateNew(VECTOR_LENGTH);
+
+    for (int i = 0;i < VECTOR_LENGTH; i++) {
+      if (i % 3 == 0) {
+        fromVector.setNull(i);
+      } else {
+        fromVector.set(i, i * i);
+      }
+    }
+    fromVector.setValueCount(VECTOR_LENGTH);
   }
 
   /**
@@ -66,6 +80,7 @@ public void prepare() {
   @TearDown
   public void tearDown() {
     vector.close();
+    fromVector.close();
     allocator.close();
   }
 
@@ -88,6 +103,15 @@ public double readWriteBenchmark() {
     return sum;
   }
 
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void copyFromBenchmark() {
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.copyFrom(i, i, (Float8Vector) fromVector);
+    }
+  }
+
   @Test
   public void evaluate() throws RunnerException {
     Options opt = new OptionsBuilder()
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java
new file mode 100644
index 00000000000..39ff9c05a35
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link VarCharVector}.
+ */
+@State(Scope.Benchmark)
+public class VarCharBenchmarks {
+
+  private static final int VECTOR_LENGTH = 1024;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private VarCharVector vector;
+
+  private VarCharVector fromVector;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    vector = new VarCharVector("vector", allocator);
+    vector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH);
+
+    fromVector = new VarCharVector("vector", allocator);
+    fromVector.allocateNew(ALLOCATOR_CAPACITY / 4, VECTOR_LENGTH);
+
+    for (int i = 0;i < VECTOR_LENGTH; i++) {
+      if (i % 3 == 0) {
+        fromVector.setNull(i);
+      } else {
+        fromVector.set(i, String.valueOf(i * 1000).getBytes());
+      }
+    }
+    fromVector.setValueCount(VECTOR_LENGTH);
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    vector.close();
+    fromVector.close();
+    allocator.close();
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void copyFromBenchmark() {
+    for (int i = 0; i < VECTOR_LENGTH; i++) {
+      vector.copyFrom(i, i, fromVector);
+    }
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+            .include(VarCharBenchmarks.class.getSimpleName())
+            .forks(1)
+            .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java
new file mode 100644
index 00000000000..047807c2ba1
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * Benchmarks for {@link DictionaryEncoder}.
+ */
+@State(Scope.Benchmark)
+public class DictionaryEncoderBenchmarks {
+
+  private BufferAllocator allocator;
+
+  private static final int DATA_SIZE = 1000;
+  private static final int KEY_SIZE = 100;
+
+
+  private static final int KEY_LENGTH = 10;
+
+  private List<String> keys = new ArrayList<>();
+
+  private VarCharVector vector;
+
+  private VarCharVector dictionaryVector;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+
+    for (int i = 0; i < KEY_SIZE; i++) {
+      keys.add(generateUniqueKey(KEY_LENGTH));
+    }
+
+    allocator = new RootAllocator(10 * 1024 * 1024);
+
+    vector = new VarCharVector("vector", allocator);
+    dictionaryVector = new VarCharVector("dict", allocator);
+
+    vector.allocateNew(10240, DATA_SIZE);
+    vector.setValueCount(DATA_SIZE);
+    for (int i = 0; i < DATA_SIZE; i++) {
+      byte[] value = keys.get(generateRandomIndex(KEY_SIZE)).getBytes(StandardCharsets.UTF_8);
+      vector.setSafe(i, value, 0, value.length);
+    }
+
+    dictionaryVector.allocateNew(1024, 100);
+    dictionaryVector.setValueCount(100);
+    for (int i = 0; i < KEY_SIZE; i++) {
+      byte[] value = keys.get(i).getBytes(StandardCharsets.UTF_8);
+      dictionaryVector.setSafe(i, value, 0, value.length);
+    }
+
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    vector.close();
+    dictionaryVector.close();
+    keys.clear();
+    allocator.close();
+  }
+
+  /**
+   * Test encode for {@link DictionaryEncoder}.
+   * @return useless. To avoid DCE by JIT.
+   */
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public int testEncode() {
+    Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+    final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary);
+    encoded.close();
+    return 0;
+  }
+
+  private int generateRandomIndex(int max) {
+    Random random = new Random();
+    return random.nextInt(max);
+  }
+
+  private String generateUniqueKey(int length) {
+    String str = "abcdefghijklmnopqrstuvwxyz";
+    Random random = new Random();
+    StringBuffer sb = new StringBuffer();
+    for (int i = 0; i < length; i++) {
+      int number = random.nextInt(26);
+      sb.append(str.charAt(number));
+    }
+    if (keys.contains(sb.toString())) {
+      return generateUniqueKey(length);
+    }
+    return sb.toString();
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(DictionaryEncoderBenchmarks.class.getSimpleName())
+        .forks(1)
+        .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/ByteFunctionHelpersBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/util/ByteFunctionHelpersBenchmarks.java
new file mode 100644
index 00000000000..6f380a8cd2d
--- /dev/null
+++ b/java/performance/src/test/java/org/apache/arrow/vector/util/ByteFunctionHelpersBenchmarks.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import io.netty.buffer.ArrowBuf;
+
+/**
+ * Benchmarks for {@link ByteFunctionHelpers}.
+ */
+@State(Scope.Benchmark)
+public class ByteFunctionHelpersBenchmarks {
+
+  private static final int BUFFER_CAPACITY = 7;
+
+  private static final int ALLOCATOR_CAPACITY = 1024 * 1024;
+
+  private BufferAllocator allocator;
+
+  private ArrowBuf buffer1;
+
+  private ArrowBuf buffer2;
+
+  /**
+   * Setup benchmarks.
+   */
+  @Setup
+  public void prepare() {
+    allocator = new RootAllocator(ALLOCATOR_CAPACITY);
+    buffer1 = allocator.buffer(BUFFER_CAPACITY);
+    buffer2 = allocator.buffer(BUFFER_CAPACITY);
+
+    for (int i = 0; i < BUFFER_CAPACITY; i++) {
+      buffer1.setByte(i, i);
+      buffer2.setByte(i, i);
+    }
+  }
+
+  /**
+   * Tear down benchmarks.
+   */
+  @TearDown
+  public void tearDown() {
+    buffer1.close();
+    buffer2.close();
+    allocator.close();
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.NANOSECONDS)
+  public void equals() {
+    ByteFunctionHelpers.equal(buffer1, 0, BUFFER_CAPACITY - 1, buffer2, 0, BUFFER_CAPACITY - 1);
+
+  }
+
+  @Test
+  public void evaluate() throws RunnerException {
+    Options opt = new OptionsBuilder()
+        .include(ByteFunctionHelpersBenchmarks.class.getSimpleName())
+        .forks(1)
+        .build();
+
+    new Runner(opt).run();
+  }
+}
diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml
index 232c98af686..294010fd254 100644
--- a/java/plasma/pom.xml
+++ b/java/plasma/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.14.0</version>
+        <version>1.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-plasma</artifactId>
     <name>Arrow Plasma Client</name>
diff --git a/java/pom.xml b/java/pom.xml
index db3584e106b..c0985ffd8f3 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>0.14.0</version>
+  <version>1.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
@@ -46,7 +46,7 @@
     <connection>scm:git:https://github.com/apache/arrow.git</connection>
     <developerConnection>scm:git:https://github.com/apache/arrow.git</developerConnection>
     <url>https://github.com/apache/arrow</url>
-    <tag>apache-arrow-0.14.0</tag>
+    <tag>apache-arrow-0.14.1</tag>
   </scm>
 
   <mailingLists>
@@ -655,6 +655,7 @@
     <module>flight</module>
     <module>performance</module>
     <module>algorithm</module>
+    <module>adapter/avro</module>
   </modules>
 
   <profiles>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 81e9856104d..bcdeb11444b 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>0.14.0</version>
+        <version>1.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 4e62d48c0f9..6c313a678b1 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>0.14.0</version>
+    <version>1.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index 26f4673ee4f..b05005dad6a 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -17,6 +17,8 @@
 
 import io.netty.buffer.ArrowBuf;
 import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.FieldType;
 
 <@pp.dropOutputFile />
 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/UnionVector.java" />
@@ -74,14 +76,18 @@ public class UnionVector implements FieldVector {
   private int singleType = 0;
   private ValueVector singleVector;
 
-  private static final byte TYPE_WIDTH = 1;
   private final CallBack callBack;
   private int typeBufferAllocationSizeInBytes;
 
+  private static final byte TYPE_WIDTH = 1;
+  private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(false /*nullable*/,
+      ArrowType.Struct.INSTANCE, null /*dictionary*/, null /*metadata*/);
+
   public UnionVector(String name, BufferAllocator allocator, CallBack callBack) {
     this.name = name;
     this.allocator = allocator;
-    this.internalStruct = new NonNullableStructVector("internal", allocator, new FieldType(false, ArrowType.Struct.INSTANCE, null, null), callBack);
+    this.internalStruct = new NonNullableStructVector("internal", allocator, INTERNAL_STRUCT_TYPE,
+        callBack);
     this.typeBuffer = allocator.getEmpty();
     this.callBack = callBack;
     this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
@@ -486,32 +492,35 @@ public Iterator<ValueVector> iterator() {
     return vectors.iterator();
   }
 
-
-    public Object getObject(int index) {
+    private ValueVector getVector(int index) {
       int type = typeBuffer.getByte(index * TYPE_WIDTH);
       switch (MinorType.values()[type]) {
-      case NULL:
-        return null;
+        case NULL:
+          return null;
       <#list vv.types as type>
         <#list type.minor as minor>
           <#assign name = minor.class?cap_first />
           <#assign fields = minor.fields!type.fields />
           <#assign uncappedName = name?uncap_first/>
           <#if !minor.typeParams?? >
-      case ${name?upper_case}:
-          return get${name}Vector().getObject(index);
+        case ${name?upper_case}:
+        return get${name}Vector();
           </#if>
         </#list>
       </#list>
-      case STRUCT:
-        return getStruct().getObject(index);
-      case LIST:
-        return getList().getObject(index);
-      default:
-        throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[type]);
+        case STRUCT:
+          return getStruct();
+        case LIST:
+          return getList();
+        default:
+          throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[type]);
       }
     }
 
+    public Object getObject(int index) {
+      return getVector(index).getObject(index);
+    }
+
     public byte[] get(int index) {
       return null;
     }
@@ -617,4 +626,27 @@ public void setType(int index, MinorType type) {
     private int getTypeBufferValueCapacity() {
       return typeBuffer.capacity() / TYPE_WIDTH;
     }
+
+    @Override
+    public int hashCode(int index) {
+      return getVector(index).hashCode(index);
+    }
+
+    @Override
+    public boolean equals(int index, ValueVector to, int toIndex) {
+      if (to == null) {
+        return false;
+      }
+      if (this.getClass() != to.getClass()) {
+        return false;
+      }
+      UnionVector that = (UnionVector) to;
+      ValueVector leftVector = getVector(index);
+      ValueVector rightVector = that.getVector(toIndex);
+
+      if (leftVector.getClass() != rightVector.getClass()) {
+        return false;
+      }
+      return leftVector.equals(index, rightVector, toIndex);
+    }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
index 30aadfbcabd..b41dbb245e8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
@@ -39,7 +39,7 @@ public V getVector() {
     return vector;
   }
 
-  /** Returns whether the vectos is created. */
+  /** Returns whether the vector is created. */
   public boolean isCreated() {
     return created;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index 91937caa4db..b0d716ae413 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -25,12 +25,13 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.types.pojo.Field;
-import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.ByteFunctionHelpers;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.util.OversizedAllocationException;
 import org.apache.arrow.vector.util.TransferPair;
 
 import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
 
 /**
  * BaseFixedWidthVector provides an abstract interface for
@@ -52,16 +53,14 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
   /**
    * Constructs a new instance.
    *
-   * @param name The name of the vector.
+   * @param field field materialized by this vector
    * @param allocator The allocator to use for allocating memory for the vector.
-   * @param fieldType The type of the buffer.
    * @param typeWidth The width in bytes of the type.
    */
-  public BaseFixedWidthVector(final String name, final BufferAllocator allocator,
-                                      FieldType fieldType, final int typeWidth) {
-    super(name, allocator);
+  public BaseFixedWidthVector(Field field, final BufferAllocator allocator, final int typeWidth) {
+    super(allocator);
     this.typeWidth = typeWidth;
-    field = new Field(name, fieldType, null);
+    this.field = field;
     valueCount = 0;
     allocationMonitor = 0;
     validityBuffer = allocator.getEmpty();
@@ -70,6 +69,11 @@ public BaseFixedWidthVector(final String name, final BufferAllocator allocator,
   }
 
 
+  @Override
+  public String getName() {
+    return field.getName();
+  }
+
   /* TODO:
    * see if getNullCount() can be made faster -- O(1)
    */
@@ -533,7 +537,7 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB
    */
   @Override
   public TransferPair getTransferPair(BufferAllocator allocator) {
-    return getTransferPair(name, allocator);
+    return getTransferPair(getName(), allocator);
   }
 
   /**
@@ -802,4 +806,65 @@ protected void handleSafe(int index) {
       reAlloc();
     }
   }
+
+  /**
+   * Copy a cell value from a particular index in source vector to a particular
+   * position in this vector.
+   *
+   * @param fromIndex position to copy from in source vector
+   * @param thisIndex position to copy to in this vector
+   * @param from      source vector
+   */
+  public void copyFrom(int fromIndex, int thisIndex, BaseFixedWidthVector from) {
+    if (from.isNull(fromIndex)) {
+      BitVectorHelper.setValidityBit(this.getValidityBuffer(), thisIndex, 0);
+    } else {
+      BitVectorHelper.setValidityBit(this.getValidityBuffer(), thisIndex, 1);
+      PlatformDependent.copyMemory(from.getDataBuffer().memoryAddress() + fromIndex * typeWidth,
+              this.getDataBuffer().memoryAddress() + thisIndex * typeWidth, typeWidth);
+    }
+  }
+
+  /**
+   * Same as {@link #copyFrom(int, int, BaseFixedWidthVector)} except that
+   * it handles the case when the capacity of the vector needs to be expanded
+   * before copy.
+   *
+   * @param fromIndex position to copy from in source vector
+   * @param thisIndex position to copy to in this vector
+   * @param from      source vector
+   */
+  public void copyFromSafe(int fromIndex, int thisIndex, BaseFixedWidthVector from) {
+    handleSafe(thisIndex);
+    copyFrom(fromIndex, thisIndex, from);
+  }
+
+  @Override
+  public int hashCode(int index) {
+    int start = typeWidth * index;
+    int end = typeWidth * (index + 1);
+    return ByteFunctionHelpers.hash(this.getDataBuffer(), start, end);
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    if (to == null) {
+      return false;
+    }
+    if (this.getClass() != to.getClass()) {
+      return false;
+    }
+
+    BaseFixedWidthVector that = (BaseFixedWidthVector) to;
+
+    int leftStart = typeWidth * index;
+    int leftEnd = typeWidth * (index + 1);
+
+    int rightStart = typeWidth * toIndex;
+    int rightEnd = typeWidth * (toIndex + 1);
+
+    int ret = ByteFunctionHelpers.equal(this.getDataBuffer(), leftStart, leftEnd,
+        that.getDataBuffer(), rightStart, rightEnd);
+    return ret == 1;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
new file mode 100644
index 00000000000..57ad645c873
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for all int type vectors.
+ */
+public interface BaseIntVector extends ValueVector {
+
+  /**
+   * Sets the value at index, note this value may need to be need truncated.
+   * Note this is safe version (i.e. call setSafe method in vector)
+   */
+  void setWithPossibleTruncate(int index, long value);
+
+  /**
+   * Sets the value at index, note this value may need to be need truncated.
+   * Note this is unsafe version (i.e. call set method in vector)
+   */
+  void setUnsafeWithPossibleTruncate(int index, long value);
+
+  /**
+   * Gets the value at index.
+   * This value may have been extended to long and will throw {@link NullPointerException}
+   * if the value is null. Note null check could be turned off via {@link NullCheckingForGet}.
+   */
+  long getValueAsLong(int index);
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index bc12e8e7180..fc8e2e70d22 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -50,16 +50,16 @@ public abstract class BaseValueVector implements ValueVector {
   public static final int INITIAL_VALUE_ALLOCATION = 3970;
 
   protected final BufferAllocator allocator;
-  protected final String name;
 
-  protected BaseValueVector(String name, BufferAllocator allocator) {
+  protected BaseValueVector(BufferAllocator allocator) {
     this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null");
-    this.name = name;
   }
 
+  public abstract String getName();
+
   @Override
   public String toString() {
-    return super.toString() + "[name = " + name + ", ...]";
+    return super.toString() + "[name = " + getName() + ", ...]";
   }
 
   @Override
@@ -73,7 +73,7 @@ public void close() {
 
   @Override
   public TransferPair getTransferPair(BufferAllocator allocator) {
-    return getTransferPair(name, allocator);
+    return getTransferPair(getName(), allocator);
   }
 
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 54913029531..19fcc67e174 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -27,7 +27,7 @@
 import org.apache.arrow.memory.OutOfMemoryException;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.types.pojo.Field;
-import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.ByteFunctionHelpers;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.util.OversizedAllocationException;
 import org.apache.arrow.vector.util.TransferPair;
@@ -58,17 +58,15 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
   /**
    * Constructs a new instance.
    *
-   * @param name A name for the vector
+   * @param field The field materialized by this vector.
    * @param allocator The allocator to use for creating/resizing buffers
-   * @param fieldType The type of this vector.
    */
-  public BaseVariableWidthVector(final String name, final BufferAllocator allocator,
-                                         FieldType fieldType) {
-    super(name, allocator);
+  public BaseVariableWidthVector(Field field, final BufferAllocator allocator) {
+    super(allocator);
+    this.field = field;
     lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
     // -1 because we require one extra slot for the offset array.
     lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
-    field = new Field(name, fieldType, null);
     valueCount = 0;
     lastSet = -1;
     offsetBuffer = allocator.getEmpty();
@@ -76,6 +74,11 @@ public BaseVariableWidthVector(final String name, final BufferAllocator allocato
     valueBuffer = allocator.getEmpty();
   }
 
+  @Override
+  public String getName() {
+    return field.getName();
+  }
+
   /* TODO:
    * see if getNullCount() can be made faster -- O(1)
    */
@@ -656,7 +659,7 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB
    */
   @Override
   public TransferPair getTransferPair(BufferAllocator allocator) {
-    return getTransferPair(name, allocator);
+    return getTransferPair(getName(), allocator);
   }
 
   /**
@@ -1275,4 +1278,90 @@ public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator,
 
     return buffer;
   }
+
+  /**
+   * Copy a cell value from a particular index in source vector to a particular
+   * position in this vector.
+   *
+   * @param fromIndex position to copy from in source vector
+   * @param thisIndex position to copy to in this vector
+   * @param from source vector
+   */
+  public void copyFrom(int fromIndex, int thisIndex, BaseVariableWidthVector from) {
+    if (from.isNull(fromIndex)) {
+      fillHoles(thisIndex);
+      BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, 0);
+      final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+      offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart);
+    } else {
+      final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
+      final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
+      final int length = end - start;
+      fillHoles(thisIndex);
+      BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, 1);
+      final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+      from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
+      offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+    }
+    lastSet = thisIndex;
+  }
+
+  /**
+   * Same as {@link #copyFrom(int, int, BaseVariableWidthVector)} except that
+   * it handles the case when the capacity of the vector needs to be expanded
+   * before copy.
+   *
+   * @param fromIndex position to copy from in source vector
+   * @param thisIndex position to copy to in this vector
+   * @param from source vector
+   */
+  public void copyFromSafe(int fromIndex, int thisIndex, BaseVariableWidthVector from) {
+    if (from.isNull(fromIndex)) {
+      handleSafe(thisIndex, 0);
+      fillHoles(thisIndex);
+      BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, 0);
+      final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+      offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart);
+    } else {
+      final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
+      final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
+      final int length = end - start;
+      handleSafe(thisIndex, length);
+      fillHoles(thisIndex);
+      BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, 1);
+      final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+      from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
+      offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+    }
+    lastSet = thisIndex;
+  }
+
+  @Override
+  public int hashCode(int index) {
+    final int start = getStartOffset(index);
+    final int end = getStartOffset(index + 1);
+    return ByteFunctionHelpers.hash(this.getDataBuffer(), start, end);
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    if (to == null) {
+      return false;
+    }
+    if (this.getClass() != to.getClass()) {
+      return false;
+    }
+
+    BaseVariableWidthVector that = (BaseVariableWidthVector) to;
+
+    final int leftStart = getStartOffset(index);
+    final int leftEnd = getStartOffset(index + 1);
+
+    final int rightStart = that.getStartOffset(toIndex);
+    final int rightEnd = that.getStartOffset(toIndex + 1);
+
+    int ret = ByteFunctionHelpers.equal(this.getDataBuffer(), leftStart, leftEnd,
+        that.getDataBuffer(), rightStart, rightEnd);
+    return ret == 1;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
index 65ce53e2581..8b9404b7a28 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.BigIntHolder;
 import org.apache.arrow.vector.holders.NullableBigIntHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class BigIntVector extends BaseFixedWidthVector {
+public class BigIntVector extends BaseFixedWidthVector implements BaseIntVector {
   public static final byte TYPE_WIDTH = 8;
   private final FieldReader reader;
 
@@ -59,7 +60,18 @@ public BigIntVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public BigIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a BigIntVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public BigIntVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new BigIntReaderImpl(BigIntVector.this);
   }
 
@@ -133,35 +145,6 @@ public Long getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, BigIntVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, BigIntVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, BigIntVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
@@ -339,6 +322,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((BigIntVector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     BigIntVector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index ebaca4e72f2..ff4504fe958 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.BitHolder;
 import org.apache.arrow.vector.holders.NullableBitHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.OversizedAllocationException;
 import org.apache.arrow.vector.util.TransferPair;
@@ -59,7 +60,18 @@ public BitVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public BitVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, 0);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a BitVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field the Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public BitVector(Field field, BufferAllocator allocator) {
+    super(field, allocator,0);
     reader = new BitReaderImpl(BitVector.this);
   }
 
@@ -284,26 +296,12 @@ public Boolean getObject(int index) {
    * @param thisIndex position to copy to in this vector
    * @param from      source vector
    */
-  public void copyFrom(int fromIndex, int thisIndex, BitVector from) {
+  @Override
+  public void copyFrom(int fromIndex, int thisIndex, BaseFixedWidthVector from) {
     BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    BitVectorHelper.setValidityBit(valueBuffer, thisIndex, from.getBit(fromIndex));
+    BitVectorHelper.setValidityBit(valueBuffer, thisIndex, ((BitVector) from).getBit(fromIndex));
   }
 
-  /**
-   * Same as {@link #copyFrom(int, int, BitVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from      source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, BitVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index d1e99006505..cd16f720ecf 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -17,6 +17,10 @@
 
 package org.apache.arrow.vector;
 
+import static io.netty.util.internal.PlatformDependent.getByte;
+import static io.netty.util.internal.PlatformDependent.getInt;
+import static io.netty.util.internal.PlatformDependent.getLong;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.DataSizeRoundingUtil;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -145,25 +149,108 @@ public static int getNullCount(final ArrowBuf validityBuffer, final int valueCou
     final int sizeInBytes = getValidityBufferSize(valueCount);
     // If value count is not a multiple of 8, then calculate number of used bits in the last byte
     final int remainder = valueCount % 8;
+    final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+    int index = 0;
+    while (index + 8 <= fullBytesCount) {
+      long longValue = validityBuffer.getLong(index);
+      count += Long.bitCount(longValue);
+      index += 8;
+    }
+
+    while (index + 4 <= fullBytesCount) {
+      int intValue = validityBuffer.getInt(index);
+      count += Integer.bitCount(intValue);
+      index += 4;
+    }
 
-    final int sizeInBytesMinus1 = sizeInBytes - 1;
-    for (int i = 0; i < sizeInBytesMinus1; i++) {
-      byte byteValue = validityBuffer.getByte(i);
+    while (index < fullBytesCount) {
+      byte byteValue = validityBuffer.getByte(index);
       count += Integer.bitCount(byteValue & 0xFF);
+      index += 1;
     }
 
-    // handling with the last byte
-    byte byteValue = validityBuffer.getByte(sizeInBytes - 1);
+    // handling with the last bits
     if (remainder != 0) {
+      byte byteValue = validityBuffer.getByte(sizeInBytes - 1);
+
       // making the remaining bits all 1s if it is not fully filled
       byte mask = (byte) (0xFF << remainder);
       byteValue = (byte) (byteValue | mask);
+      count += Integer.bitCount(byteValue & 0xFF);
     }
-    count += Integer.bitCount(byteValue & 0xFF);
 
     return 8 * sizeInBytes - count;
   }
 
+  /**
+   * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
+   * @param validityBuffer the validity buffer.
+   * @param valueCount the bit count.
+   * @param  checkOneBits if set to true, the method checks if all bits are equal to 1;
+   *                      otherwise, it checks if all bits are equal to 0.
+   * @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
+   */
+  public static boolean checkAllBitsEqualTo(
+          final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) {
+    if (valueCount == 0) {
+      return true;
+    }
+    final int sizeInBytes = getValidityBufferSize(valueCount);
+
+    // boundary check
+    validityBuffer.checkBytes(0, sizeInBytes);
+
+    // If value count is not a multiple of 8, then calculate number of used bits in the last byte
+    final int remainder = valueCount % 8;
+    final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+    // the integer number to compare against
+    final int intToCompare = checkOneBits ? -1 : 0;
+
+    int index = 0;
+    while (index + 8 <= fullBytesCount) {
+      long longValue = getLong(validityBuffer.memoryAddress() + index);
+      if (longValue != (long) intToCompare) {
+        return false;
+      }
+      index += 8;
+    }
+
+    while (index + 4 <= fullBytesCount) {
+      int intValue = getInt(validityBuffer.memoryAddress() + index);
+      if (intValue != intToCompare) {
+        return false;
+      }
+      index += 4;
+    }
+
+    while (index < fullBytesCount) {
+      byte byteValue = getByte(validityBuffer.memoryAddress() + index);
+      if (byteValue != (byte) intToCompare) {
+        return false;
+      }
+      index += 1;
+    }
+
+    // handling with the last bits
+    if (remainder != 0) {
+      byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
+      byte mask = (byte) ((1 << remainder) - 1);
+      byteValue = (byte) (byteValue & mask);
+      if (checkOneBits) {
+        if ((mask & byteValue) != mask) {
+          return false;
+        }
+      } else {
+        if (byteValue != (byte) 0) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
   /** Returns the byte at index from data right-shifted by offset. */
   public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) {
     return (byte) ((data.getByte(index) & 0xFF) >>> offset);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
index 1e2b012748c..72af5def9fc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.DateDayHolder;
 import org.apache.arrow.vector.holders.NullableDateDayHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -59,7 +60,18 @@ public DateDayVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public DateDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a DateDayVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public DateDayVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new DateDayReaderImpl(DateDayVector.this);
   }
 
@@ -134,35 +146,6 @@ public Integer getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, DateDayVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, DateDayVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, DateDayVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
index e8ea5be11c9..be4fcbe26e9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.holders.DateMilliHolder;
 import org.apache.arrow.vector.holders.NullableDateMilliHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -62,7 +63,18 @@ public DateMilliVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public DateMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a DateMilliVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public DateMilliVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new DateMilliReaderImpl(DateMilliVector.this);
   }
 
@@ -138,35 +150,6 @@ public LocalDateTime getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, DateMilliVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, DateMilliVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, DateMilliVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 9664bee58f3..cf77186929c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -28,11 +28,13 @@
 import org.apache.arrow.vector.holders.NullableDecimalHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DecimalUtility;
 import org.apache.arrow.vector.util.TransferPair;
 
 import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
 
 /**
  * DecimalVector implements a fixed width vector (16 bytes) of
@@ -68,8 +70,19 @@ public DecimalVector(String name, BufferAllocator allocator,
    * @param allocator allocator for memory management.
    */
   public DecimalVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
-    ArrowType.Decimal arrowType = (ArrowType.Decimal) fieldType.getType();
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a DecimalVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public DecimalVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
+    ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType();
     reader = new DecimalReaderImpl(DecimalVector.this);
     this.precision = arrowType.getPrecision();
     this.scale = arrowType.getScale();
@@ -150,34 +163,6 @@ public BigDecimal getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, DecimalVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    from.valueBuffer.getBytes(fromIndex * TYPE_WIDTH, valueBuffer,
-            thisIndex * TYPE_WIDTH, TYPE_WIDTH);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, DecimalVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, DecimalVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
   /**
    * Return scale for the decimal value.
    */
@@ -222,41 +207,30 @@ public void set(int index, ArrowBuf buffer) {
   public void setBigEndian(int index, byte[] value) {
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
     final int length = value.length;
-    int startIndex = index * TYPE_WIDTH;
-    if (length == TYPE_WIDTH) {
-      for (int i = TYPE_WIDTH - 1; i >= 3; i -= 4) {
-        valueBuffer.setByte(startIndex, value[i]);
-        valueBuffer.setByte(startIndex + 1, value[i - 1]);
-        valueBuffer.setByte(startIndex + 2, value[i - 2]);
-        valueBuffer.setByte(startIndex + 3, value[i - 3]);
-        startIndex += 4;
-      }
 
-      return;
+    // do the bound check.
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    // swap bytes to convert BE to LE
+    for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+      PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
     }
 
-    if (length == 0) {
-      valueBuffer.setZero(startIndex, TYPE_WIDTH);
+    if (length == TYPE_WIDTH) {
       return;
     }
 
-    if (length < 16) {
-      for (int i = length - 1; i >= 0; i--) {
-        valueBuffer.setByte(startIndex, value[i]);
-        startIndex++;
-      }
-
+    if (length == 0) {
+      PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte)0);
+    } else if (length < TYPE_WIDTH) {
+      // sign extend
       final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-      final int maxStartIndex = (index + 1) * TYPE_WIDTH;
-      while (startIndex < maxStartIndex) {
-        valueBuffer.setByte(startIndex, pad);
-        startIndex++;
-      }
-
-      return;
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+    } else {
+      throw new IllegalArgumentException(
+          "Invalid decimal value length. Valid length in [1 - 16], got " + length);
     }
-
-    throw new IllegalArgumentException("Invalid decimal value length. Valid length in [1 - 16], got " + length);
   }
 
   /**
@@ -281,17 +255,19 @@ public void set(int index, int start, ArrowBuf buffer) {
   public void setSafe(int index, int start, ArrowBuf buffer, int length) {
     handleSafe(index);
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    int startIndexInVector = index * TYPE_WIDTH;
-    valueBuffer.setBytes(startIndexInVector, buffer, start, length);
+
+    // do the bound checks.
+    buffer.checkBytes(start, start + length);
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    long inAddress = buffer.memoryAddress() + start;
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    PlatformDependent.copyMemory(inAddress, outAddress, length);
     // sign extend
     if (length < 16) {
-      byte msb = buffer.getByte(start + length - 1);
+      byte msb = PlatformDependent.getByte(inAddress + length - 1);
       final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-      int startIndex = startIndexInVector + length;
-      int endIndex = startIndexInVector + TYPE_WIDTH;
-      for (int i = startIndex; i < endIndex; i++) {
-        valueBuffer.setByte(i, pad);
-      }
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
     }
   }
 
@@ -306,19 +282,24 @@ public void setSafe(int index, int start, ArrowBuf buffer, int length) {
   public void setBigEndianSafe(int index, int start, ArrowBuf buffer, int length) {
     handleSafe(index);
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    int startIndexInVector = index * TYPE_WIDTH;
-    for (int i = start + length - 1; i >= start; i--) {
-      valueBuffer.setByte(startIndexInVector, buffer.getByte(i));
-      startIndexInVector++;
+
+    // do the bound checks.
+    buffer.checkBytes(start, start + length);
+    valueBuffer.checkBytes(index * TYPE_WIDTH, (index + 1) * TYPE_WIDTH);
+
+    // not using buffer.getByte() to avoid boundary checks for every byte.
+    long inAddress = buffer.memoryAddress() + start;
+    long outAddress = valueBuffer.memoryAddress() + index * TYPE_WIDTH;
+    // swap bytes to convert BE to LE
+    for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+      byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+      PlatformDependent.putByte(outAddress + byteIdx, val);
     }
     // sign extend
     if (length < 16) {
-      byte msb = buffer.getByte(start);
+      byte msb = PlatformDependent.getByte(inAddress);
       final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-      int endIndex = startIndexInVector + TYPE_WIDTH - length;
-      for (int i = startIndexInVector; i < endIndex; i++) {
-        valueBuffer.setByte(i, pad);
-      }
+      PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
index 312c8e51309..92a9e7044b8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -54,10 +55,20 @@ public class DurationVector extends BaseFixedWidthVector {
    * @param allocator allocator for memory management.
    */
   public DurationVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
-    reader = new DurationReaderImpl(DurationVector.this);
-    this.unit =  ((ArrowType.Duration)fieldType.getType()).getUnit();
+    this(new Field(name, fieldType, null), allocator);
+  }
 
+  /**
+   * Instantiate a DurationVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public DurationVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
+    reader = new DurationReaderImpl(DurationVector.this);
+    this.unit =  ((ArrowType.Duration)field.getFieldType().getType()).getUnit();
   }
 
   /**
@@ -182,42 +193,12 @@ private StringBuilder getAsStringBuilderHelper(int index) {
     return new StringBuilder(getObject(index).toString());
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, DurationVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    from.valueBuffer.getBytes(fromIndex * TYPE_WIDTH, this.valueBuffer,
-              thisIndex * TYPE_WIDTH, TYPE_WIDTH);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, DurationVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, DurationVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
    |                                                                |
    *----------------------------------------------------------------*/
 
-
   /**
    * Set the element at the given index to the given value.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
index 9594d9e5814..14a66f8dafa 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -39,12 +39,35 @@ public abstract class ExtensionTypeVector<T extends BaseValueVector & FieldVecto
     FieldVector {
 
   private final T underlyingVector;
+  private final String name;
 
+  /**
+   * Instantiate an extension type vector.
+   * @param name name of the vector
+   * @param allocator allocator for memory management
+   * @param underlyingVector underlying filed vector
+   */
   public ExtensionTypeVector(String name, BufferAllocator allocator, T underlyingVector) {
-    super(name, allocator);
+    super(allocator);
+    this.name = name;
     this.underlyingVector = underlyingVector;
   }
 
+  /**
+   * Instantiate an extension type vector.
+   * @param field field materialized by this vector.
+   * @param allocator allocator for memory management
+   * @param underlyingVector underlying filed vector
+   */
+  public ExtensionTypeVector(Field field, BufferAllocator allocator, T underlyingVector) {
+    this(field.getName(), allocator, underlyingVector);
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
   /** Get the underlying vector. */
   public T getUnderlyingVector() {
     return underlyingVector;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
index a58f994b44d..b7cce562ae9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
@@ -17,8 +17,6 @@
 
 package org.apache.arrow.vector;
 
-import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
-
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
@@ -26,6 +24,7 @@
 import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -61,9 +60,20 @@ public FixedSizeBinaryVector(String name, BufferAllocator allocator, int byteWid
    * @param allocator allocator for memory management.
    */
   public FixedSizeBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, ((FixedSizeBinary) fieldType.getType()).getByteWidth());
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public FixedSizeBinaryVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth());
     reader = new FixedSizeBinaryReaderImpl(FixedSizeBinaryVector.this);
-    byteWidth = ((FixedSizeBinary) fieldType.getType()).getByteWidth();
+    byteWidth = ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth();
   }
 
   /**
@@ -102,8 +112,8 @@ public MinorType getMinorType() {
    */
   public byte[] get(int index) {
     assert index >= 0;
-    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
-      throw new IllegalStateException("Value at index is null");
+    if (isSet(index) == 0) {
+      return null;
     }
     final byte[] dst = new byte[byteWidth];
     valueBuffer.getBytes(index * byteWidth, dst, 0, byteWidth);
@@ -136,42 +146,7 @@ public void get(int index, NullableFixedSizeBinaryHolder holder) {
    */
   @Override
   public byte[] getObject(int index) {
-    assert index >= 0;
-    if (isSet(index) == 0) {
-      return null;
-    } else {
-      final byte[] dst = new byte[byteWidth];
-      valueBuffer.getBytes(index * byteWidth, dst, 0, byteWidth);
-      return dst;
-    }
-  }
-
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from      source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, FixedSizeBinaryVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    from.valueBuffer.getBytes(fromIndex * byteWidth, valueBuffer,
-        thisIndex * byteWidth, byteWidth);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, FixedSizeBinaryVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from      source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, FixedSizeBinaryVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
+    return get(index);
   }
 
   public int getByteWidth() {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
index c6d6d5bb1f7..96b5625c54a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.Float4Holder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -59,7 +60,18 @@ public Float4Vector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public Float4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a Float4Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float4Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new Float4ReaderImpl(Float4Vector.this);
   }
 
@@ -135,35 +147,6 @@ public Float getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, Float4Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final float value = from.valueBuffer.getFloat(fromIndex * TYPE_WIDTH);
-    valueBuffer.setFloat(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, Float4Vector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, Float4Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
index bfcb3e065ec..24128cdbb88 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.Float8Holder;
 import org.apache.arrow.vector.holders.NullableFloat8Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -59,7 +60,18 @@ public Float8Vector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a Float8Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float8Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new Float8ReaderImpl(Float8Vector.this);
   }
 
@@ -135,34 +147,6 @@ public Double getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, Float8Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final double value = from.valueBuffer.getDouble(fromIndex * TYPE_WIDTH);
-    valueBuffer.setDouble(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, Float8Vector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, Float8Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
index 3a8207f0abc..61235e0de52 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.IntHolder;
 import org.apache.arrow.vector.holders.NullableIntHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class IntVector extends BaseFixedWidthVector {
+public class IntVector extends BaseFixedWidthVector implements BaseIntVector {
   public static final byte TYPE_WIDTH = 4;
   private final FieldReader reader;
 
@@ -59,7 +60,18 @@ public IntVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public IntVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a IntVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public IntVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new IntReaderImpl(IntVector.this);
   }
 
@@ -135,35 +147,6 @@ public Integer getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from      source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, IntVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, IntVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from      source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, IntVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
@@ -343,6 +326,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((IntVector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     IntVector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
index 3afe757d199..0d7125b7a0e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.holders.IntervalDayHolder;
 import org.apache.arrow.vector.holders.NullableIntervalDayHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -63,7 +64,18 @@ public IntervalDayVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public IntervalDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public IntervalDayVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new IntervalDayReaderImpl(IntervalDayVector.this);
   }
 
@@ -211,35 +223,6 @@ private StringBuilder getAsStringBuilderHelper(int index) {
             .append(millis));
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, IntervalDayVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    from.valueBuffer.getBytes(fromIndex * TYPE_WIDTH, this.valueBuffer,
-              thisIndex * TYPE_WIDTH, TYPE_WIDTH);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, IntervalDayVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, IntervalDayVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
index 6d8c00135dd..2b73d02504d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.holders.IntervalYearHolder;
 import org.apache.arrow.vector.holders.NullableIntervalYearHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -61,7 +62,18 @@ public IntervalYearVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public IntervalYearVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public IntervalYearVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new IntervalYearReaderImpl(IntervalYearVector.this);
   }
 
@@ -184,35 +196,6 @@ private StringBuilder getAsStringBuilderHelper(int index) {
         .append(monthString));
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, IntervalYearVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, IntervalYearVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, IntervalYearVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
index dddc46fef2b..52ecd59b098 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableSmallIntHolder;
 import org.apache.arrow.vector.holders.SmallIntHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * short values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class SmallIntVector extends BaseFixedWidthVector {
+public class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector {
   public static final byte TYPE_WIDTH = 2;
   private final FieldReader reader;
 
@@ -59,7 +60,18 @@ public SmallIntVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public SmallIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a SmallIntVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public SmallIntVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new SmallIntReaderImpl(SmallIntVector.this);
   }
 
@@ -135,35 +147,6 @@ public Short getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, SmallIntVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final short value = from.valueBuffer.getShort(fromIndex * TYPE_WIDTH);
-    valueBuffer.setShort(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, SmallIntVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, SmallIntVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
@@ -370,6 +353,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((SmallIntVector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     SmallIntVector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
index a8cb0fe5bfb..089164c0016 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeMicroHolder;
 import org.apache.arrow.vector.holders.TimeMicroHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -60,7 +61,18 @@ public TimeMicroVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public TimeMicroVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeMicroVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new TimeMicroReaderImpl(TimeMicroVector.this);
   }
 
@@ -135,35 +147,6 @@ public Long getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TimeMicroVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, TimeMicroVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TimeMicroVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
-
   /*----------------------------------------------------------------*
    |                                                                |
    |          vector value setter methods                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
index adf7562c116..9f41c84527d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.holders.NullableTimeMilliHolder;
 import org.apache.arrow.vector.holders.TimeMilliHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -62,7 +63,18 @@ public TimeMilliVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public TimeMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeMilliVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new TimeMilliReaderImpl(TimeMilliVector.this);
   }
 
@@ -138,34 +150,6 @@ public LocalDateTime getObject(int index) {
     return DateUtility.getLocalDateTimeFromEpochMilli(millis);
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TimeMilliVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, TimeMilliVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TimeMilliVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
index 2a288191b45..053a722430f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeNanoHolder;
 import org.apache.arrow.vector.holders.TimeNanoHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -59,7 +60,18 @@ public TimeNanoVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public TimeNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeNanoVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new TimeNanoReaderImpl(TimeNanoVector.this);
   }
 
@@ -135,34 +147,6 @@ public Long getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TimeNanoVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, TimeNanoVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TimeNanoVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
index 5198a26ca23..15992af79d6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeSecHolder;
 import org.apache.arrow.vector.holders.TimeSecHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -59,7 +60,18 @@ public TimeSecVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public TimeSecVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TimeSecVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeSecVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new TimeSecReaderImpl(TimeSecVector.this);
   }
 
@@ -135,34 +147,6 @@ public Integer getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TimeSecVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, TimeSecVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TimeSecVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
index 7027f4f1aca..5b4bc2a234a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -63,6 +64,20 @@ public TimeStampMicroTZVector(String name, FieldType fieldType, BufferAllocator
     reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampMicroTZVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+    timeZone = arrowType.getTimezone();
+    reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
index 2a427de1445..7a5f8d8d19c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder;
 import org.apache.arrow.vector.holders.TimeStampMicroHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -61,6 +62,18 @@ public TimeStampMicroVector(String name, FieldType fieldType, BufferAllocator al
     reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampMicroVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
index 7cb83bed7ea..358aa96b120 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -63,6 +64,20 @@ public TimeStampMilliTZVector(String name, FieldType fieldType, BufferAllocator
     reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampMilliTZVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+    timeZone = arrowType.getTimezone();
+    reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
index b05749e8951..d05250cdcec 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
 import org.apache.arrow.vector.holders.TimeStampMilliHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -61,6 +62,18 @@ public TimeStampMilliVector(String name, FieldType fieldType, BufferAllocator al
     reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampMilliVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
index eacc891358b..a668a6ca6ba 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -63,6 +64,20 @@ public TimeStampNanoTZVector(String name, FieldType fieldType, BufferAllocator a
     reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampNanoTZVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+    timeZone = arrowType.getTimezone();
+    reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
index ccc17de5c93..70f4db6a97e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder;
 import org.apache.arrow.vector.holders.TimeStampNanoHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -61,6 +62,18 @@ public TimeStampNanoVector(String name, FieldType fieldType, BufferAllocator all
     reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampNanoVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
index 3f24c18de03..fe6fe795545 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -63,6 +64,20 @@ public TimeStampSecTZVector(String name, FieldType fieldType, BufferAllocator al
     reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampSecTZVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+    timeZone = arrowType.getTimezone();
+    reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
index 2293c10ab92..686c4a44388 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTimeStampSecHolder;
 import org.apache.arrow.vector.holders.TimeStampSecHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.DateUtility;
 import org.apache.arrow.vector.util.TransferPair;
@@ -61,6 +62,18 @@ public TimeStampSecVector(String name, FieldType fieldType, BufferAllocator allo
     reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
   }
 
+  /**
+   * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampSecVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
+    reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
+  }
+
   /**
    * Get a reader that supports reading values from this vector.
    *
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
index d4aa9d8c68a..53bcbc0aacf 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
@@ -20,6 +20,7 @@
 import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -42,7 +43,18 @@ public abstract class TimeStampVector extends BaseFixedWidthVector {
    * @param allocator allocator for memory management.
    */
   public TimeStampVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TimeStampVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TimeStampVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
   }
 
 
@@ -65,34 +77,6 @@ public long get(int index) throws IllegalStateException {
     return valueBuffer.getLong(index * TYPE_WIDTH);
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TimeStampVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFromSafe(int, int, TimeStampVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TimeStampVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
index df40b6e57cc..ebac2fcec3d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableTinyIntHolder;
 import org.apache.arrow.vector.holders.TinyIntHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * byte values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class TinyIntVector extends BaseFixedWidthVector {
+public class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector {
   public static final byte TYPE_WIDTH = 1;
   private final FieldReader reader;
 
@@ -59,7 +60,18 @@ public TinyIntVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public TinyIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a TinyIntVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public TinyIntVector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new TinyIntReaderImpl(TinyIntVector.this);
   }
 
@@ -135,34 +147,6 @@ public Byte getObject(int index) {
     }
   }
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, TinyIntVector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final byte value = from.valueBuffer.getByte(fromIndex * TYPE_WIDTH);
-    valueBuffer.setByte(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, TinyIntVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, TinyIntVector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
@@ -370,6 +354,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((TinyIntVector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     TinyIntVector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
index c5133344fe8..df3759e7c2d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
@@ -25,17 +25,19 @@
 import org.apache.arrow.vector.holders.NullableUInt1Holder;
 import org.apache.arrow.vector.holders.UInt1Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
 import io.netty.buffer.ArrowBuf;
 
+
 /**
  * UInt1Vector implements a fixed width (1 bytes) vector of
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class UInt1Vector extends BaseFixedWidthVector {
+public class UInt1Vector extends BaseFixedWidthVector implements BaseIntVector {
   private static final byte TYPE_WIDTH = 1;
   private final FieldReader reader;
 
@@ -44,7 +46,11 @@ public UInt1Vector(String name, BufferAllocator allocator) {
   }
 
   public UInt1Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  public UInt1Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new UInt1ReaderImpl(UInt1Vector.this);
   }
 
@@ -140,24 +146,6 @@ public Short getObjectNoOverflow(int index) {
     }
   }
 
-  /**
-   * Copies the value at fromIndex to thisIndex (including validity).
-   */
-  public void copyFrom(int fromIndex, int thisIndex, UInt1Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final byte value = from.valueBuffer.getByte(fromIndex * TYPE_WIDTH);
-    valueBuffer.setByte(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Identical to {@link #copyFrom()} but reallocates buffer if index is larger
-   * than capacity.
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, UInt1Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
@@ -329,6 +317,23 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((UInt1Vector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
+
+
   private class TransferImpl implements TransferPair {
     UInt1Vector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
index 631050d57a2..e39e5e2646e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableUInt2Holder;
 import org.apache.arrow.vector.holders.UInt2Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class UInt2Vector extends BaseFixedWidthVector {
+public class UInt2Vector extends BaseFixedWidthVector implements BaseIntVector {
   private static final byte TYPE_WIDTH = 2;
   private final FieldReader reader;
 
@@ -44,7 +45,11 @@ public UInt2Vector(String name, BufferAllocator allocator) {
   }
 
   public UInt2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  public UInt2Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new UInt2ReaderImpl(UInt2Vector.this);
   }
 
@@ -121,22 +126,6 @@ public Character getObject(int index) {
     }
   }
 
-  /** Copies a value and validity bit from the given vector to this one. */
-  public void copyFrom(int fromIndex, int thisIndex, UInt2Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final char value = from.valueBuffer.getChar(fromIndex * TYPE_WIDTH);
-    valueBuffer.setChar(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, UInt2Vector)} but reallocate buffer if
-   * index is larger than capacity.
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, UInt2Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
@@ -308,6 +297,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((UInt2Vector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     UInt2Vector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
index 84e6b8f3788..8657f067bd6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.holders.NullableUInt4Holder;
 import org.apache.arrow.vector.holders.UInt4Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -35,7 +36,7 @@
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class UInt4Vector extends BaseFixedWidthVector {
+public class UInt4Vector extends BaseFixedWidthVector implements BaseIntVector {
   private static final byte TYPE_WIDTH = 4;
   private final FieldReader reader;
 
@@ -44,7 +45,11 @@ public UInt4Vector(String name, BufferAllocator allocator) {
   }
 
   public UInt4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  public UInt4Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new UInt4ReaderImpl(UInt4Vector.this);
   }
 
@@ -139,25 +144,6 @@ public Long getObjectNoOverflow(int index) {
     }
   }
 
-  /**
-   * Copies a value and validity setting to the thisIndex position from the given vector
-   * at fromIndex.
-   */
-  public void copyFrom(int fromIndex, int thisIndex, UInt4Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int value = from.valueBuffer.getInt(fromIndex * TYPE_WIDTH);
-    valueBuffer.setInt(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, UInt4Vector)} but will allocate additional space
-   * if fromIndex is larger than current capacity.
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, UInt4Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
@@ -301,6 +287,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((UInt4Vector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, (int) value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, (int) value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     UInt4Vector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
index 0f8da381ee5..62a5ba5e4ab 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.vector.holders.NullableUInt8Holder;
 import org.apache.arrow.vector.holders.UInt8Holder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -37,7 +38,7 @@
  * integer values which could be null. A validity buffer (bit vector) is
  * maintained to track which elements in the vector are null.
  */
-public class UInt8Vector extends BaseFixedWidthVector {
+public class UInt8Vector extends BaseFixedWidthVector implements BaseIntVector {
   private static final byte TYPE_WIDTH = 8;
   private final FieldReader reader;
 
@@ -46,7 +47,11 @@ public UInt8Vector(String name, BufferAllocator allocator) {
   }
 
   public UInt8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType, TYPE_WIDTH);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  public UInt8Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
     reader = new UInt8ReaderImpl(UInt8Vector.this);
   }
 
@@ -144,25 +149,6 @@ public BigInteger getObjectNoOverflow(int index) {
     }
   }
 
-  /**
-   * Copy a value and validity setting from fromIndex in <code>from</code> to this
-   * Vector at thisIndex.
-   */
-  public void copyFrom(int fromIndex, int thisIndex, UInt8Vector from) {
-    BitVectorHelper.setValidityBit(validityBuffer, thisIndex, from.isSet(fromIndex));
-    final long value = from.valueBuffer.getLong(fromIndex * TYPE_WIDTH);
-    valueBuffer.setLong(thisIndex * TYPE_WIDTH, value);
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, UInt8Vector)} but reallocates if thisIndex is
-   * larger then current capacity.
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, UInt8Vector from) {
-    handleSafe(thisIndex);
-    copyFrom(fromIndex, thisIndex, from);
-  }
-
 
   /*----------------------------------------------------------------*
    |                                                                |
@@ -302,6 +288,21 @@ public TransferPair makeTransferPair(ValueVector to) {
     return new TransferImpl((UInt8Vector) to);
   }
 
+  @Override
+  public void setWithPossibleTruncate(int index, long value) {
+    this.setSafe(index, value);
+  }
+
+  @Override
+  public void setUnsafeWithPossibleTruncate(int index, long value) {
+    this.set(index, value);
+  }
+
+  @Override
+  public long getValueAsLong(int index) {
+    return this.get(index);
+  }
+
   private class TransferImpl implements TransferPair {
     UInt8Vector to;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
index 86a381a0aec..795493a4127 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -237,4 +237,18 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
    * @return true if element is null
    */
   boolean isNull(int index);
+
+  /**
+   * Returns hashCode of element in index.
+   */
+  int hashCode(int index);
+
+  /**
+   * Check whether the element in index equals to the element in targetIndex from the target vector.
+   * @param index index to compare in this vector
+   * @param target target vector
+   * @param targetIndex index to compare in target vector
+   * @return true if equals, otherwise false.
+   */
+  boolean equals(int index, ValueVector target, int targetIndex);
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
index 093ffac9099..14c6c9a4fef 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -17,14 +17,13 @@
 
 package org.apache.arrow.vector;
 
-import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
-
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
 import org.apache.arrow.vector.holders.VarBinaryHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.TransferPair;
 
@@ -56,7 +55,18 @@ public VarBinaryVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public VarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public VarBinaryVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
     reader = new VarBinaryReaderImpl(VarBinaryVector.this);
   }
 
@@ -97,8 +107,8 @@ public MinorType getMinorType() {
    */
   public byte[] get(int index) {
     assert index >= 0;
-    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
-      throw new IllegalStateException("Value at index is null");
+    if (isSet(index) == 0) {
+      return null;
     }
     final int startOffset = getStartOffset(index);
     final int dataLength =
@@ -115,13 +125,7 @@ public byte[] get(int index) {
    * @return byte array for non-null element, null otherwise
    */
   public byte[] getObject(int index) {
-    byte[] b;
-    try {
-      b = get(index);
-    } catch (IllegalStateException e) {
-      return null;
-    }
-    return b;
+    return get(index);
   }
 
   /**
@@ -151,48 +155,6 @@ public void get(int index, NullableVarBinaryHolder holder) {
    *----------------------------------------------------------------*/
 
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, VarBinaryVector from) {
-    final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
-    final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
-    final int length = end - start;
-    fillHoles(thisIndex);
-    BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
-    from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
-    offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
-    lastSet = thisIndex;
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, VarBinaryVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, VarBinaryVector from) {
-    final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
-    final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
-    final int length = end - start;
-    handleSafe(thisIndex, length);
-    fillHoles(thisIndex);
-    BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
-    from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
-    offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
-    lastSet = thisIndex;
-  }
-
   /**
    * Set the variable length element at the specified index to the data
    * buffer supplied in the holder.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
index 5b2623c500b..1f21a576d7a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -17,14 +17,13 @@
 
 package org.apache.arrow.vector;
 
-import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
-
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.impl.VarCharReaderImpl;
 import org.apache.arrow.vector.complex.reader.FieldReader;
 import org.apache.arrow.vector.holders.NullableVarCharHolder;
 import org.apache.arrow.vector.holders.VarCharHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.Text;
 import org.apache.arrow.vector.util.TransferPair;
@@ -55,7 +54,18 @@ public VarCharVector(String name, BufferAllocator allocator) {
    * @param allocator allocator for memory management.
    */
   public VarCharVector(String name, FieldType fieldType, BufferAllocator allocator) {
-    super(name, allocator, fieldType);
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a VarCharVector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public VarCharVector(Field field, BufferAllocator allocator) {
+    super(field, allocator);
     reader = new VarCharReaderImpl(VarCharVector.this);
   }
 
@@ -94,8 +104,8 @@ public MinorType getMinorType() {
    */
   public byte[] get(int index) {
     assert index >= 0;
-    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
-      throw new IllegalStateException("Value at index is null");
+    if (isSet(index) == 0) {
+      return null;
     }
     final int startOffset = getStartOffset(index);
     final int dataLength =
@@ -112,15 +122,12 @@ public byte[] get(int index) {
    * @return Text object for non-null element, null otherwise
    */
   public Text getObject(int index) {
-    Text result = new Text();
-    byte[] b;
-    try {
-      b = get(index);
-    } catch (IllegalStateException e) {
+    byte[] b = get(index);
+    if (b == null) {
       return null;
+    } else {
+      return new Text(b);
     }
-    result.set(b);
-    return result;
   }
 
   /**
@@ -150,48 +157,6 @@ public void get(int index, NullableVarCharHolder holder) {
    *----------------------------------------------------------------*/
 
 
-  /**
-   * Copy a cell value from a particular index in source vector to a particular
-   * position in this vector.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFrom(int fromIndex, int thisIndex, VarCharVector from) {
-    final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
-    final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
-    final int length = end - start;
-    fillHoles(thisIndex);
-    BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
-    from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
-    offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
-    lastSet = thisIndex;
-  }
-
-  /**
-   * Same as {@link #copyFrom(int, int, VarCharVector)} except that
-   * it handles the case when the capacity of the vector needs to be expanded
-   * before copy.
-   *
-   * @param fromIndex position to copy from in source vector
-   * @param thisIndex position to copy to in this vector
-   * @param from source vector
-   */
-  public void copyFromSafe(int fromIndex, int thisIndex, VarCharVector from) {
-    final int start = from.offsetBuffer.getInt(fromIndex * OFFSET_WIDTH);
-    final int end = from.offsetBuffer.getInt((fromIndex + 1) * OFFSET_WIDTH);
-    final int length = end - start;
-    handleSafe(thisIndex, length);
-    fillHoles(thisIndex);
-    BitVectorHelper.setValidityBit(this.validityBuffer, thisIndex, from.isSet(fromIndex));
-    final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
-    from.valueBuffer.getBytes(start, this.valueBuffer, copyStart, length);
-    offsetBuffer.setInt((thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
-    lastSet = thisIndex;
-  }
-
   /**
    * Set the variable length element at the specified index to the data
    * buffer supplied in the holder.
@@ -300,7 +265,6 @@ public void setSafe(int index, Text text) {
     setSafe(index, text.getBytes(), 0, text.getLength());
   }
 
-
   /*----------------------------------------------------------------*
    |                                                                |
    |                      vector transfer                           |
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
index 373e03c9023..a3fab14c3de 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
@@ -35,7 +35,7 @@
  */
 public class VectorSchemaRoot implements AutoCloseable {
 
-  private final Schema schema;
+  private Schema schema;
   private int rowCount;
   private final List<FieldVector> fieldVectors;
   private final Map<String, FieldVector> fieldVectorsMap = new HashMap<>();
@@ -206,4 +206,24 @@ public String contentToTSVString() {
     }
     return sb.toString();
   }
+
+  /**
+   * Synchronizes the schema from the current vectors.
+   * In some cases, the schema and the actual vector structure may be different.
+   * This can be caused by a promoted writer (For details, please see
+   * {@link org.apache.arrow.vector.complex.impl.PromotableWriter}).
+   * For example, when writing different types of data to a {@link org.apache.arrow.vector.complex.ListVector}
+   * may lead to such a case.
+   * When this happens, this method should be called to bring the schema and vector structure in a synchronized state.
+   * @return true if the schema is updated, false otherwise.
+   */
+  public boolean syncSchema() {
+    List<Field> oldFields = this.schema.getFields();
+    List<Field> newFields = this.fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList());
+    if (!oldFields.equals(newFields)) {
+      this.schema = new Schema(newFields);
+      return true;
+    }
+    return false;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
index 37784ede1d1..b34aa4d6f15 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
@@ -244,4 +244,14 @@ public int getNullCount() {
   public boolean isNull(int index) {
     return false;
   }
+
+  @Override
+  public int hashCode(int index) {
+    return 0;
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    return false;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
index bbd3ff28ebd..ba837a24c1f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -25,6 +25,7 @@
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.types.pojo.FieldType;
@@ -139,12 +140,7 @@ public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType,
   }
 
   private boolean nullFilled(ValueVector vector) {
-    for (int r = 0; r < vector.getValueCount(); r++) {
-      if (!vector.isNull(r)) {
-        return false;
-      }
-    }
-    return true;
+    return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(), vector.getValueCount(), false);
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index a370cb8a2d5..dbc7236bb1b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -55,19 +55,28 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
   protected final CallBack callBack;
   protected int valueCount;
   protected int offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+  private final String name;
+
+  protected String defaultDataVectorName = DATA_VECTOR_NAME;
 
   protected BaseRepeatedValueVector(String name, BufferAllocator allocator, CallBack callBack) {
     this(name, allocator, DEFAULT_DATA_VECTOR, callBack);
   }
 
   protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) {
-    super(name, allocator);
+    super(allocator);
+    this.name = name;
     this.offsetBuffer = allocator.getEmpty();
     this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
     this.callBack = callBack;
     this.valueCount = 0;
   }
 
+  @Override
+  public String getName() {
+    return name;
+  }
+
   @Override
   public boolean allocateNewSafe() {
     boolean dataAlloc = false;
@@ -270,7 +279,7 @@ public int size() {
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
     boolean created = false;
     if (vector instanceof ZeroVector) {
-      vector = fieldType.createNewSingleVector(DATA_VECTOR_NAME, allocator, callBack);
+      vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
       // returned vector must have the same field
       created = true;
       if (callBack != null &&
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 35bede68fe9..c665bef8710 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -45,6 +45,7 @@
 import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.ByteFunctionHelpers;
 import org.apache.arrow.vector.util.CallBack;
 import org.apache.arrow.vector.util.JsonStringArrayList;
 import org.apache.arrow.vector.util.OversizedAllocationException;
@@ -65,6 +66,7 @@ public static FixedSizeListVector empty(String name, int size, BufferAllocator a
   private ArrowBuf validityBuffer;
   private final int listSize;
   private final FieldType fieldType;
+  private final String name;
 
   private UnionFixedSizeListReader reader;
   private int valueCount;
@@ -94,14 +96,14 @@ public FixedSizeListVector(String name,
                              BufferAllocator allocator,
                              FieldType fieldType,
                              CallBack unusedSchemaChangeCallback) {
-    super(name, allocator);
+    super(allocator);
 
+    this.name = name;
     this.validityBuffer = allocator.getEmpty();
     this.vector = ZeroVector.INSTANCE;
     this.fieldType = fieldType;
     this.listSize = ((ArrowType.FixedSizeList) fieldType.getType()).getListSize();
     Preconditions.checkArgument(listSize > 0, "list size must be positive");
-    this.reader = new UnionFixedSizeListReader(this);
     this.valueCount = 0;
     this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
   }
@@ -117,6 +119,11 @@ public MinorType getMinorType() {
     return MinorType.FIXED_SIZE_LIST;
   }
 
+  @Override
+  public String getName() {
+    return name;
+  }
+
   /** Get the fixed size for each list. */
   public int getListSize() {
     return listSize;
@@ -177,9 +184,16 @@ public List<BufferBacked> getFieldInnerVectors() {
 
   @Override
   public UnionFixedSizeListReader getReader() {
+    if (reader == null) {
+      reader = new UnionFixedSizeListReader(this);
+    }
     return reader;
   }
 
+  private void invalidateReader() {
+    reader = null;
+  }
+
   @Override
   public void allocateNew() throws OutOfMemoryException {
     if (!allocateNewSafe()) {
@@ -339,7 +353,7 @@ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType type)
     boolean created = false;
     if (vector == ZeroVector.INSTANCE) {
       vector = type.createNewSingleVector(DATA_VECTOR_NAME, allocator, null);
-      this.reader = new UnionFixedSizeListReader(this);
+      invalidateReader();
       created = true;
     }
     // returned vector must have the same field
@@ -366,7 +380,7 @@ public UnionVector promoteToUnion() {
     UnionVector vector = new UnionVector(name, allocator, null);
     this.vector.clear();
     this.vector = vector;
-    this.reader = new UnionFixedSizeListReader(this);
+    invalidateReader();
     return vector;
   }
 
@@ -489,6 +503,37 @@ public TransferPair makeTransferPair(ValueVector target) {
     return new TransferImpl((FixedSizeListVector) target);
   }
 
+  @Override
+  public int hashCode(int index) {
+    if (isSet(index) == 0) {
+      return 0;
+    }
+    int hash = 0;
+    for (int i = 0; i < listSize; i++) {
+      hash = ByteFunctionHelpers.comebineHash(hash, vector.hashCode(index * listSize + i));
+    }
+    return hash;
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    if (to == null) {
+      return false;
+    }
+    if (this.getClass() != to.getClass()) {
+      return false;
+    }
+
+    FixedSizeListVector that = (FixedSizeListVector) to;
+
+    for (int i = 0; i < listSize; i++) {
+      if (!vector.equals(index * listSize + i, that, toIndex * listSize + i)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   private class TransferImpl implements TransferPair {
 
     FixedSizeListVector to;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index bfbc5c7f563..43b43bdbe1a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -72,6 +72,10 @@ public static ListVector empty(String name, BufferAllocator allocator) {
   private CallBack callBack;
   private final FieldType fieldType;
   private int validityAllocationSizeInBytes;
+
+  /**
+   * The maximum index that is actually set.
+   */
   private int lastSet;
 
   /**
@@ -101,11 +105,10 @@ public ListVector(String name, BufferAllocator allocator, DictionaryEncoding dic
   public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
     super(name, allocator, callBack);
     this.validityBuffer = allocator.getEmpty();
-    this.reader = new UnionListReader(this);
     this.fieldType = checkNotNull(fieldType);
     this.callBack = callBack;
     this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
-    this.lastSet = 0;
+    this.lastSet = -1;
   }
 
   @Override
@@ -200,7 +203,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
     validityAllocationSizeInBytes = validityBuffer.capacity();
     offsetAllocationSizeInBytes = offsetBuffer.capacity();
 
-    lastSet = fieldNode.getLength();
+    lastSet = fieldNode.getLength() - 1;
     valueCount = fieldNode.getLength();
   }
 
@@ -408,6 +411,49 @@ public ArrowBuf getOffsetBuffer() {
     return offsetBuffer;
   }
 
+  @Override
+  public int hashCode(int index) {
+    if (isSet(index) == 0) {
+      return 0;
+    }
+    int hash = 0;
+    final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+    final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+    for (int i = start; i < end; i++) {
+      hash = 31 * vector.hashCode(i);
+    }
+    return hash;
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    if (to == null) {
+      return false;
+    }
+    if (this.getClass() != to.getClass()) {
+      return false;
+    }
+
+    ListVector that = (ListVector) to;
+    final int leftStart = offsetBuffer.getInt(index * OFFSET_WIDTH);
+    final int leftEnd = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+
+    final int rightStart = that.offsetBuffer.getInt(toIndex * OFFSET_WIDTH);
+    final int rightEnd = that.offsetBuffer.getInt((toIndex + 1) * OFFSET_WIDTH);
+
+    if ((leftEnd - leftStart) != (rightEnd - rightStart)) {
+      return false;
+    }
+
+    for (int i = 0; i < (leftEnd - leftStart); i++) {
+      if (!vector.equals(leftStart + i, that.vector, rightStart + i)) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   private class TransferImpl implements TransferPair {
 
     ListVector to;
@@ -465,7 +511,7 @@ public void splitAndTransfer(int startIndex, int length) {
       splitAndTransferValidityBuffer(startIndex, length, to);
       /* splitAndTransfer data buffer */
       dataTransferPair.splitAndTransfer(startPoint, sliceLength);
-      to.lastSet = length;
+      to.lastSet = length - 1;
       to.setValueCount(length);
     }
 
@@ -541,13 +587,16 @@ public void copyValueSafe(int from, int to) {
 
   @Override
   public UnionListReader getReader() {
+    if (reader == null) {
+      reader = new UnionListReader(this);
+    }
     return reader;
   }
 
   /** Initialize the child data vector to field type.  */
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
     AddOrGetResult<T> result = super.addOrGetVector(fieldType);
-    reader = new UnionListReader(this);
+    invalidateReader();
     return result;
   }
 
@@ -568,7 +617,7 @@ public int getBufferSize() {
 
   @Override
   public Field getField() {
-    return new Field(name, fieldType, Collections.singletonList(getDataVector().getField()));
+    return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
   }
 
   @Override
@@ -580,14 +629,14 @@ public MinorType getMinorType() {
   public void clear() {
     super.clear();
     validityBuffer = releaseBuffer(validityBuffer);
-    lastSet = 0;
+    lastSet = -1;
   }
 
   @Override
   public void reset() {
     super.reset();
     validityBuffer.setZero(0, validityBuffer.capacity());
-    lastSet = 0;
+    lastSet = -1;
   }
 
   /**
@@ -627,13 +676,17 @@ public ArrowBuf[] getBuffers(boolean clear) {
   public UnionVector promoteToUnion() {
     UnionVector vector = new UnionVector("$data$", allocator, callBack);
     replaceDataVector(vector);
-    reader = new UnionListReader(this);
+    invalidateReader();
     if (callBack != null) {
       callBack.doWork();
     }
     return vector;
   }
 
+  protected void invalidateReader() {
+    reader = null;
+  }
+
   /**
    * Get the element in the list vector at a particular index.
    * @param index position of the element
@@ -716,7 +769,7 @@ public void setNotNull(int index) {
       reallocValidityAndOffsetBuffers();
     }
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    lastSet = index + 1;
+    lastSet = index;
   }
 
   /**
@@ -729,13 +782,13 @@ public int startNewValue(int index) {
     while (index >= getValidityAndOffsetValueCapacity()) {
       reallocValidityAndOffsetBuffers();
     }
-    for (int i = lastSet; i <= index; i++) {
+    for (int i = lastSet + 1; i <= index; i++) {
       final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
     }
     BitVectorHelper.setValidityBitToOne(validityBuffer, index);
-    lastSet = index + 1;
-    return offsetBuffer.getInt(lastSet * OFFSET_WIDTH);
+    lastSet = index;
+    return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
   }
 
   /**
@@ -762,7 +815,7 @@ public void setValueCount(int valueCount) {
         /* check if validity and offset buffers need to be re-allocated */
         reallocValidityAndOffsetBuffers();
       }
-      for (int i = lastSet; i < valueCount; i++) {
+      for (int i = lastSet + 1; i < valueCount; i++) {
         /* fill the holes with offsets */
         final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
         offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
@@ -770,7 +823,7 @@ public void setValueCount(int valueCount) {
     }
     /* valueCount for the data vector is the current end offset */
     final int childValueCount = (valueCount == 0) ? 0 :
-            offsetBuffer.getInt(lastSet * OFFSET_WIDTH);
+            offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      */
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
index a3671501ba7..994a7d0d7d1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -24,18 +24,14 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.AddOrGetResult;
 import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.ValueVector;
-import org.apache.arrow.vector.ZeroVector;
 import org.apache.arrow.vector.complex.impl.UnionMapReader;
 import org.apache.arrow.vector.complex.impl.UnionMapWriter;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.Types.MinorType;
-import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
 import org.apache.arrow.vector.types.pojo.ArrowType.Map;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.CallBack;
-import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
 
 /**
  * A MapVector is used to store entries of key/value pairs. It is a container vector that is
@@ -48,9 +44,7 @@ public class MapVector extends ListVector {
 
   public static final String KEY_NAME = "key";
   public static final String VALUE_NAME = "value";
-
-  // TODO: this is only used for addOrGetVector because ListVector declares it private
-  protected CallBack callBack;
+  public static final String DATA_VECTOR_NAME = "entries";
 
   /**
    * Construct an empty MapVector with no data. Child vectors must be added subsequently.
@@ -74,8 +68,7 @@ public static MapVector empty(String name, BufferAllocator allocator, boolean ke
    */
   public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
     super(name, allocator, fieldType, callBack);
-    this.callBack = callBack;
-    reader = new UnionMapReader(this);
+    defaultDataVectorName = DATA_VECTOR_NAME;
   }
 
   /**
@@ -116,52 +109,9 @@ public UnionMapWriter getWriter() {
    */
   @Override
   public UnionMapReader getReader() {
-    return (UnionMapReader)reader;
-  }
-
-  /**
-   * Add a child vector that will be the list vector, or get the vector if already added.
-   *
-   * @param fieldType The field type of the child vector.
-   * @param <T> Type of the resulting vector.
-   * @return return an AddOrGetResult instance that contains the vector and created flag.
-   */
-  @Override
-  public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
-
-    // TODO: can call super method once DATA_VECTOR_NAME is configurable
-    boolean created = false;
-    if (vector instanceof ZeroVector) {
-      vector = fieldType.createNewSingleVector("entries", allocator, callBack);
-      // returned vector must have the same field
-      created = true;
-      if (callBack != null &&
-              // not a schema change if changing from ZeroVector to ZeroVector
-              (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
-        callBack.doWork();
-      }
-    }
-
-    if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
-      final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
-              fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
-      throw new SchemaChangeRuntimeException(msg);
+    if (reader == null) {
+      reader = new UnionMapReader(this);
     }
-
-    reader = new UnionMapReader(this);
-
-    return new AddOrGetResult<>((T) vector, created);
-  }
-
-  /**
-   * Promote this MapVector to a UnionVector.
-   *
-   * @return the new UnionVector.
-   */
-  @Override
-  public UnionVector promoteToUnion() {
-    UnionVector result = super.promoteToUnion();
-    reader = new UnionMapReader(this);
-    return result;
+    return (UnionMapReader)reader;
   }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
index 1ca315a843c..1d9b8713697 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
@@ -286,6 +286,56 @@ public Object getObject(int index) {
     return vv;
   }
 
+  @Override
+  public int hashCode(int index) {
+    int hash = 0;
+    for (String child : getChildFieldNames()) {
+      ValueVector v = getChild(child);
+      if (v != null && index < v.getValueCount()) {
+        hash += 31 * hash + v.hashCode(index);
+      }
+    }
+    return hash;
+  }
+
+  @Override
+  public boolean equals(int index, ValueVector to, int toIndex) {
+    if (to == null) {
+      return false;
+    }
+    if (this.getClass() != to.getClass()) {
+      return false;
+    }
+    NonNullableStructVector that = (NonNullableStructVector) to;
+    List<ValueVector> leftChildrens = new ArrayList<>();
+    List<ValueVector> rightChildrens = new ArrayList<>();
+
+    for (String child : getChildFieldNames()) {
+      ValueVector v = getChild(child);
+      if (v != null) {
+        leftChildrens.add(v);
+      }
+    }
+
+    for (String child : that.getChildFieldNames()) {
+      ValueVector v = that.getChild(child);
+      if (v != null) {
+        rightChildrens.add(v);
+      }
+    }
+
+    if (leftChildrens.size() != rightChildrens.size()) {
+      return false;
+    }
+
+    for (int i = 0; i < leftChildrens.size(); i++) {
+      if (!leftChildrens.get(i).equals(index, rightChildrens.get(i), toIndex)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   @Override
   public boolean isNull(int index) {
     return false;
@@ -372,4 +422,5 @@ public void initializeChildrenFromFields(List<Field> children) {
   public List<FieldVector> getChildrenFromFields() {
     return getChildren();
   }
+
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 35f50ef23a0..2ad9fb75091 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -479,9 +479,22 @@ public Object getObject(int index) {
     }
   }
 
+  @Override
+  public int hashCode(int index) {
+    if (isSet(index) == 0) {
+      return 0;
+    } else {
+      return super.hashCode(index);
+    }
+  }
+
   @Override
   public void get(int index, ComplexHolder holder) {
     holder.isSet = isSet(index);
+    if (holder.isSet == 0) {
+      holder.reader = null;
+      return;
+    }
     super.get(index, holder);
   }
 
@@ -542,4 +555,5 @@ public void setValueCount(int valueCount) {
     super.setValueCount(valueCount);
     this.valueCount = valueCount;
   }
+
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
index ce3948c01f9..9009040fe5f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -42,7 +42,7 @@
  * specifically typed writer cannot handle. A new UnionVector is created, wrapping the original vector, and replaces the
  * original vector in the parent vector, which can be either an AbstractStructVector or a ListVector.
  *
- * <p>The writer used can either be for single elements (struct) or  lists.</p>
+ * <p>The writer used can either be for single elements (struct) or lists.</p>
  */
 public class PromotableWriter extends AbstractPromotableFieldWriter {
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ByteArrayWrapper.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ByteArrayWrapper.java
new file mode 100644
index 00000000000..bcfac3983f3
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ByteArrayWrapper.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Arrays;
+
+/**
+ * Wrapper class for byte array.
+ */
+public class ByteArrayWrapper {
+  private final byte[] data;
+
+  /**
+   * Constructs a new instance.
+   */
+  public ByteArrayWrapper(byte[] data) {
+    if (data == null) {
+      throw new NullPointerException();
+    }
+
+    this.data = data;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof ByteArrayWrapper)) {
+      return false;
+    }
+
+    return Arrays.equals(data, ((ByteArrayWrapper)other).data);
+  }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(data);
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
index 1c2a0aced17..9b16bb160ea 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
@@ -17,12 +17,7 @@
 
 package org.apache.arrow.vector.dictionary;
 
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
+import org.apache.arrow.vector.BaseIntVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.types.Types.MinorType;
@@ -48,11 +43,11 @@ public class DictionaryEncoder {
    */
   public static ValueVector encode(ValueVector vector, Dictionary dictionary) {
     validateType(vector.getMinorType());
-    // load dictionary values into a hashmap for lookup
-    Map<Object, Integer> lookUps = new HashMap<>(dictionary.getVector().getValueCount());
+    // load dictionary indices into a hashmap for lookup
+
+    DictionaryHashTable hashTable = new DictionaryHashTable(dictionary.getVector());
     for (int i = 0; i < dictionary.getVector().getValueCount(); i++) {
-      // for primitive array types we need a wrapper that implements equals and hashcode appropriately
-      lookUps.put(dictionary.getVector().getObject(i), i);
+      hashTable.put(i);
     }
 
     Field valueField = vector.getField();
@@ -61,43 +56,27 @@ public static ValueVector encode(ValueVector vector, Dictionary dictionary) {
     Field indexField = new Field(valueField.getName(), indexFieldType, null);
 
     // vector to hold our indices (dictionary encoded values)
-    FieldVector indices = indexField.createVector(vector.getAllocator());
-
-    // use reflection to pull out the set method
-    // TODO implement a common interface for int vectors
-    Method setter = null;
-    for (Class<?> c : Arrays.asList(int.class, long.class)) {
-      try {
-        setter = indices.getClass().getMethod("setSafe", int.class, c);
-        break;
-      } catch (NoSuchMethodException e) {
-        // ignore
-      }
+    FieldVector createdVector = indexField.createVector(vector.getAllocator());
+    if (! (createdVector instanceof BaseIntVector)) {
+      throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" +
+          createdVector.getClass());
     }
-    if (setter == null) {
-      throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" + indices.getClass());
-    }
-
-    int count = vector.getValueCount();
 
+    BaseIntVector indices = (BaseIntVector) createdVector;
     indices.allocateNew();
 
-    try {
-      for (int i = 0; i < count; i++) {
-        Object value = vector.getObject(i);
-        if (value != null) { // if it's null leave it null
-          // note: this may fail if value was not included in the dictionary
-          Object encoded = lookUps.get(value);
-          if (encoded == null) {
-            throw new IllegalArgumentException("Dictionary encoding not defined for value:" + value);
-          }
-          setter.invoke(indices, i, encoded);
+    int count = vector.getValueCount();
+
+    for (int i = 0; i < count; i++) {
+      if (!vector.isNull(i)) { // if it's null leave it null
+        // note: this may fail if value was not included in the dictionary
+        //int encoded = lookUps.get(value);
+        int encoded = hashTable.getIndex(i, vector);
+        if (encoded == -1) {
+          throw new IllegalArgumentException("Dictionary encoding not defined for value:" + vector.getObject(i));
         }
+        indices.setWithPossibleTruncate(i, encoded);
       }
-    } catch (IllegalAccessException e) {
-      throw new RuntimeException("IllegalAccessException invoking vector mutator set():", e);
-    } catch (InvocationTargetException e) {
-      throw new RuntimeException("InvocationTargetException invoking vector mutator set():", e.getCause());
     }
 
     indices.setValueCount(count);
@@ -119,10 +98,11 @@ public static ValueVector decode(ValueVector indices, Dictionary dictionary) {
     // copy the dictionary values into the decoded vector
     TransferPair transfer = dictionaryVector.getTransferPair(indices.getAllocator());
     transfer.getTo().allocateNewSafe();
+
+    BaseIntVector baseIntVector = (BaseIntVector) indices;
     for (int i = 0; i < count; i++) {
-      Object index = indices.getObject(i);
-      if (index != null) {
-        int indexAsInt = ((Number) index).intValue();
+      if (!baseIntVector.isNull(i)) {
+        int indexAsInt = (int) baseIntVector.getValueAsLong(i);
         if (indexAsInt > dictionaryCount) {
           throw new IllegalArgumentException("Provided dictionary does not contain value for index " + indexAsInt);
         }
@@ -136,11 +116,8 @@ public static ValueVector decode(ValueVector indices, Dictionary dictionary) {
   }
 
   private static void validateType(MinorType type) {
-    // byte arrays don't work as keys in our dictionary map - we could wrap them with something to
-    // implement equals and hashcode if we want that functionality
-    if (type == MinorType.VARBINARY || type == MinorType.FIXEDSIZEBINARY || type == MinorType.LIST ||
-        type == MinorType.STRUCT || type == MinorType.UNION) {
-      throw new IllegalArgumentException("Dictionary encoding for complex types not implemented: type " + type);
+    if (type == MinorType.UNION) {
+      throw new IllegalArgumentException("Dictionary encoding not implemented for current type: " + type);
     }
   }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
new file mode 100644
index 00000000000..725783591d6
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Objects;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * HashTable used for Dictionary encoding. It holds two vectors (the vector to encode and dictionary vector)
+ * It stores the index in dictionary vector and for a given index in encode vector,
+ * it could return dictionary index.
+ */
+public class DictionaryHashTable {
+
+  /**
+   * Represents a null value in map.
+   */
+  static final int NULL_VALUE = -1;
+
+  /**
+   * The default initial capacity - MUST be a power of two.
+   */
+  static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
+
+  /**
+   * The maximum capacity, used if a higher value is implicitly specified
+   * by either of the constructors with arguments.
+   */
+  static final int MAXIMUM_CAPACITY = 1 << 30;
+
+  /**
+   * The load factor used when none specified in constructor.
+   */
+  static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+  static final DictionaryHashTable.Entry[] EMPTY_TABLE = {};
+
+  /**
+   * The table, initialized on first use, and resized as
+   * necessary. When allocated, length is always a power of two.
+   */
+  transient DictionaryHashTable.Entry[] table = (DictionaryHashTable.Entry[]) EMPTY_TABLE;
+
+  /**
+   * The number of key-value mappings contained in this map.
+   */
+  transient int size;
+
+  /**
+   * The next size value at which to resize (capacity * load factor).
+   */
+  int threshold;
+
+  /**
+   * The load factor for the hash table.
+   */
+  final float loadFactor;
+
+  private final ValueVector dictionary;
+
+  /**
+   * Constructs an empty map with the specified initial capacity and load factor.
+   */
+  public DictionaryHashTable(int initialCapacity, ValueVector dictionary) {
+    if (initialCapacity < 0) {
+      throw new IllegalArgumentException("Illegal initial capacity: " +
+          initialCapacity);
+    }
+    if (initialCapacity > MAXIMUM_CAPACITY) {
+      initialCapacity = MAXIMUM_CAPACITY;
+    }
+    this.loadFactor = DEFAULT_LOAD_FACTOR;
+    this.threshold = initialCapacity;
+
+    this.dictionary = dictionary;
+  }
+
+  public DictionaryHashTable(ValueVector dictionary) {
+    this(DEFAULT_INITIAL_CAPACITY, dictionary);
+  }
+
+  /**
+   * Compute the capacity with given threshold and create init table.
+   */
+  private void inflateTable(int threshold) {
+    int capacity = roundUpToPowerOf2(threshold);
+    this.threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
+    table = new DictionaryHashTable.Entry[capacity];
+  }
+
+  /**
+   * Computes the storage location in an array for the given hashCode.
+   */
+  static int indexFor(int h, int length) {
+    return h & (length - 1);
+  }
+
+  /**
+   * Returns a power of two size for the given size.
+   */
+  static final int roundUpToPowerOf2(int size) {
+    int n = size - 1;
+    n |= n >>> 1;
+    n |= n >>> 2;
+    n |= n >>> 4;
+    n |= n >>> 8;
+    n |= n >>> 16;
+    return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
+  }
+
+  /**
+   * get the corresponding dictionary index with the given index in vector which to encode.
+   * @param indexInArray index in vector.
+   * @return dictionary vector index or -1 if no value equals.
+   */
+  public int getIndex(int indexInArray, ValueVector toEncode) {
+    int hash = toEncode.hashCode(indexInArray);
+    int index = indexFor(hash, table.length);
+    for (DictionaryHashTable.Entry e = table[index]; e != null ; e = e.next) {
+      if ((e.hash == hash)) {
+        int dictIndex = e.index;
+        if (dictionary.equals(dictIndex, toEncode, indexInArray)) {
+          return dictIndex;
+        }
+      }
+    }
+    return NULL_VALUE;
+  }
+
+  /**
+   * put the index of dictionary vector to build hash table.
+   */
+  public void put(int indexInDictionary) {
+    if (table == EMPTY_TABLE) {
+      inflateTable(threshold);
+    }
+
+    int hash = dictionary.hashCode(indexInDictionary);
+    int i = indexFor(hash, table.length);
+    for (DictionaryHashTable.Entry e = table[i]; e != null; e = e.next) {
+      if (e.hash == hash && e.index == indexInDictionary) {
+        //already has this index, return
+        return;
+      }
+    }
+
+    addEntry(hash, indexInDictionary, i);
+  }
+
+  /**
+   * Create a new Entry at the specific position of table.
+   */
+  void createEntry(int hash, int index, int bucketIndex) {
+    DictionaryHashTable.Entry e = table[bucketIndex];
+    table[bucketIndex] = new DictionaryHashTable.Entry(hash, index, e);
+    size++;
+  }
+
+  /**
+   * Add Entry at the specified location of the table.
+   */
+  void addEntry(int hash, int index, int bucketIndex) {
+    if ((size >= threshold) && (null != table[bucketIndex])) {
+      resize(2 * table.length);
+      bucketIndex = indexFor(hash, table.length);
+    }
+
+    createEntry(hash, index, bucketIndex);
+  }
+
+  /**
+   * Resize table with given new capacity.
+   */
+  void resize(int newCapacity) {
+    DictionaryHashTable.Entry[] oldTable = table;
+    int oldCapacity = oldTable.length;
+    if (oldCapacity == MAXIMUM_CAPACITY) {
+      threshold = Integer.MAX_VALUE;
+      return;
+    }
+
+    DictionaryHashTable.Entry[] newTable = new DictionaryHashTable.Entry[newCapacity];
+    transfer(newTable);
+    table = newTable;
+    threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
+  }
+
+  /**
+   * Transfer entries into new table from old table.
+   * @param newTable new table
+   */
+  void transfer(DictionaryHashTable.Entry[] newTable) {
+    int newCapacity = newTable.length;
+    for (DictionaryHashTable.Entry e : table) {
+      while (null != e) {
+        DictionaryHashTable.Entry next = e.next;
+        int i = indexFor(e.hash, newCapacity);
+        e.next = newTable[i];
+        newTable[i] = e;
+        e = next;
+      }
+    }
+  }
+
+  /**
+   * Returns the number of mappings in this Map.
+   */
+  public int size() {
+    return size;
+  }
+
+  /**
+   * Removes all elements from this map, leaving it empty.
+   */
+  public void clear() {
+    size = 0;
+    for (int i = 0; i < table.length; i++) {
+      table[i] = null;
+    }
+  }
+
+  /**
+   * Class to keep dictionary index data within hash table.
+   */
+  static class Entry {
+    //dictionary index
+    int index;
+    DictionaryHashTable.Entry next;
+    int hash;
+
+    Entry(int hash, int index, DictionaryHashTable.Entry next) {
+      this.index = index;
+      this.hash = hash;
+      this.next = next;
+    }
+
+    public final int getIndex() {
+      return this.index;
+    }
+
+    public final boolean equals(Object o) {
+      if (!(o instanceof DictionaryHashTable.Entry)) {
+        return false;
+      }
+      DictionaryHashTable.Entry e = (DictionaryHashTable.Entry) o;
+      if (Objects.equals(index, e.getIndex())) {
+        return true;
+      }
+      return false;
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index 9eb7d95ab39..35aafcba0f0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -120,6 +120,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
 import org.apache.arrow.vector.types.pojo.ArrowType.Union;
 import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.CallBack;
 
@@ -133,8 +134,7 @@ public enum MinorType {
     NULL(Null.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
         return ZeroVector.INSTANCE;
@@ -148,11 +148,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     STRUCT(Struct.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new StructVector(name, allocator, fieldType, schemaChangeCallback);
+        return new StructVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
       }
 
       @Override
@@ -163,11 +162,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TINYINT(new Int(8, true)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TinyIntVector(name, fieldType, allocator);
+        return new TinyIntVector(field, allocator);
       }
 
       @Override
@@ -178,11 +176,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     SMALLINT(new Int(16, true)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new SmallIntVector(name, fieldType, allocator);
+        return new SmallIntVector(field, allocator);
       }
 
       @Override
@@ -193,11 +190,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     INT(new Int(32, true)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new IntVector(name, fieldType, allocator);
+        return new IntVector(field, allocator);
       }
 
       @Override
@@ -208,11 +204,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     BIGINT(new Int(64, true)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new BigIntVector(name, fieldType, allocator);
+        return new BigIntVector(field, allocator);
       }
 
       @Override
@@ -223,11 +218,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     DATEDAY(new Date(DateUnit.DAY)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new DateDayVector(name, fieldType, allocator);
+        return new DateDayVector(field, allocator);
       }
 
       @Override
@@ -238,11 +232,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     DATEMILLI(new Date(DateUnit.MILLISECOND)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new DateMilliVector(name, fieldType, allocator);
+        return new DateMilliVector(field, allocator);
       }
 
       @Override
@@ -253,11 +246,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESEC(new Time(TimeUnit.SECOND, 32)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeSecVector(name, fieldType, allocator);
+        return new TimeSecVector(field, allocator);
       }
 
       @Override
@@ -268,11 +260,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeMilliVector(name, fieldType, allocator);
+        return new TimeMilliVector(field, allocator);
       }
 
       @Override
@@ -283,11 +274,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeMicroVector(name, fieldType, allocator);
+        return new TimeMicroVector(field, allocator);
       }
 
       @Override
@@ -298,11 +288,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeNanoVector(name, fieldType, allocator);
+        return new TimeNanoVector(field, allocator);
       }
 
       @Override
@@ -314,11 +303,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, null)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampSecVector(name, fieldType, allocator);
+        return new TimeStampSecVector(field, allocator);
       }
 
       @Override
@@ -330,11 +318,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, null)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampMilliVector(name, fieldType, allocator);
+        return new TimeStampMilliVector(field, allocator);
       }
 
       @Override
@@ -346,11 +333,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, null)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampMicroVector(name, fieldType, allocator);
+        return new TimeStampMicroVector(field, allocator);
       }
 
       @Override
@@ -362,11 +348,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, null)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampNanoVector(name, fieldType, allocator);
+        return new TimeStampNanoVector(field, allocator);
       }
 
       @Override
@@ -377,11 +362,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     INTERVALDAY(new Interval(IntervalUnit.DAY_TIME)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new IntervalDayVector(name, fieldType, allocator);
+        return new IntervalDayVector(field, allocator);
       }
 
       @Override
@@ -392,11 +376,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     DURATION(null) {
       @Override
       public FieldVector getNewVector(
-              String name,
-              FieldType fieldType,
-              BufferAllocator allocator,
-              CallBack schemaChangeCallback) {
-        return new DurationVector(name, fieldType, allocator);
+          Field field,
+          BufferAllocator allocator,
+          CallBack schemaChangeCallback) {
+        return new DurationVector(field, allocator);
       }
 
       @Override
@@ -409,11 +392,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     INTERVALYEAR(new Interval(IntervalUnit.YEAR_MONTH)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new IntervalYearVector(name, fieldType, allocator);
+        return new IntervalYearVector(field, allocator);
       }
 
       @Override
@@ -425,11 +407,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     FLOAT4(new FloatingPoint(SINGLE)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new Float4Vector(name, fieldType, allocator);
+        return new Float4Vector(field, allocator);
       }
 
       @Override
@@ -441,11 +422,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     FLOAT8(new FloatingPoint(DOUBLE)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new Float8Vector(name, fieldType, allocator);
+        return new Float8Vector(field, allocator);
       }
 
       @Override
@@ -456,11 +436,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     BIT(Bool.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new BitVector(name, fieldType, allocator);
+        return new BitVector(field, allocator);
       }
 
       @Override
@@ -471,11 +450,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     VARCHAR(Utf8.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new VarCharVector(name, fieldType, allocator);
+        return new VarCharVector(field, allocator);
       }
 
       @Override
@@ -486,11 +464,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     VARBINARY(Binary.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new VarBinaryVector(name, fieldType, allocator);
+        return new VarBinaryVector(field, allocator);
       }
 
       @Override
@@ -501,11 +478,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     DECIMAL(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new DecimalVector(name, fieldType, allocator);
+        return new DecimalVector(field, allocator);
       }
 
       @Override
@@ -516,11 +492,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     FIXEDSIZEBINARY(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new FixedSizeBinaryVector(name, fieldType, allocator);
+        return new FixedSizeBinaryVector(field, allocator);
       }
 
       @Override
@@ -531,11 +506,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     UINT1(new Int(8, false)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new UInt1Vector(name, fieldType, allocator);
+        return new UInt1Vector(field, allocator);
       }
 
       @Override
@@ -546,11 +520,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     UINT2(new Int(16, false)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new UInt2Vector(name, fieldType, allocator);
+        return new UInt2Vector(field, allocator);
       }
 
       @Override
@@ -561,11 +534,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     UINT4(new Int(32, false)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new UInt4Vector(name, fieldType, allocator);
+        return new UInt4Vector(field, allocator);
       }
 
       @Override
@@ -576,11 +548,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     UINT8(new Int(64, false)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new UInt8Vector(name, fieldType, allocator);
+        return new UInt8Vector(field, allocator);
       }
 
       @Override
@@ -591,11 +562,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     LIST(List.INSTANCE) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new ListVector(name, allocator, fieldType, schemaChangeCallback);
+        return new ListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
       }
 
       @Override
@@ -606,11 +576,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     FIXED_SIZE_LIST(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new FixedSizeListVector(name, allocator, fieldType, schemaChangeCallback);
+        return new FixedSizeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
       }
 
       @Override
@@ -622,15 +591,14 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     UNION(new Union(Sparse, null)) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        if (fieldType.getDictionary() != null) {
+        if (field.getFieldType().getDictionary() != null) {
           throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
-            "types");
+              "types");
         }
-        return new UnionVector(name, allocator, schemaChangeCallback);
+        return new UnionVector(field.getName(), allocator, schemaChangeCallback);
       }
 
       @Override
@@ -641,11 +609,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     MAP(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new MapVector(name, allocator, fieldType, schemaChangeCallback);
+        return new MapVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
       }
 
       @Override
@@ -656,11 +623,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPSECTZ(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampSecTZVector(name, fieldType, allocator);
+        return new TimeStampSecTZVector(field, allocator);
       }
 
       @Override
@@ -671,11 +637,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPMILLITZ(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampMilliTZVector(name, fieldType, allocator);
+        return new TimeStampMilliTZVector(field, allocator);
       }
 
       @Override
@@ -686,11 +651,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPMICROTZ(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampMicroTZVector(name, fieldType, allocator);
+        return new TimeStampMicroTZVector(field, allocator);
       }
 
       @Override
@@ -701,11 +665,10 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     TIMESTAMPNANOTZ(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return new TimeStampNanoTZVector(name, fieldType, allocator);
+        return new TimeStampNanoTZVector(field, allocator);
       }
 
       @Override
@@ -716,11 +679,11 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
     EXTENSIONTYPE(null) {
       @Override
       public FieldVector getNewVector(
-          String name,
-          FieldType fieldType,
+          Field field,
           BufferAllocator allocator,
           CallBack schemaChangeCallback) {
-        return ((ExtensionType) fieldType.getType()).getNewVector(name, fieldType, allocator);
+        return ((ExtensionType) field.getFieldType().getType()).getNewVector(field.getName(),
+            field.getFieldType(), allocator);
       }
 
       @Override
@@ -747,10 +710,18 @@ public final ArrowType getType() {
     }
 
     /** Constructs a new vector for the given type. */
-    public abstract FieldVector getNewVector(
+    public final FieldVector getNewVector(
         String name,
         FieldType fieldType,
         BufferAllocator allocator,
+        CallBack schemaChangeCallback) {
+      return getNewVector(new Field(name, fieldType, null), allocator, schemaChangeCallback);
+    }
+
+    /** Constructs a new vector for the given type. */
+    public abstract FieldVector getNewVector(
+        Field field,
+        BufferAllocator allocator,
         CallBack schemaChangeCallback);
 
     public abstract FieldWriter getNewFieldWriter(ValueVector vector);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
index 2bbb52dea98..8d41b92d867 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
@@ -37,9 +37,9 @@ public class DictionaryEncoding {
   /**
    * Constructs a new instance.
    *
-   * @param id The ID of the dictionary to use for iecnoding.
+   * @param id The ID of the dictionary to use for encoding.
    * @param ordered Whether the keys in values in the dictionary are ordered.
-   * @param indexType (nullable).  The integer type to use for indexing in the dictionary.  Defaults to a signed
+   * @param indexType (nullable). The integer type to use for indexing in the dictionary. Defaults to a signed
    *     32 bit integer.
    */
   @JsonCreator
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
index 99ceb6a0f99..c35e84449e3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
@@ -108,7 +108,7 @@ public Field(String name, FieldType fieldType, List<Field> children) {
    * Construct a new vector of this type using the given allocator.
    */
   public FieldVector createVector(BufferAllocator allocator) {
-    FieldVector vector = fieldType.createNewSingleVector(name, allocator, null);
+    FieldVector vector = fieldType.createNewSingleVector(this, allocator, null);
     vector.initializeChildrenFromFields(children);
     return vector;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
index 4cc4067c997..945f5df2d98 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
@@ -98,4 +98,9 @@ public FieldVector createNewSingleVector(String name, BufferAllocator allocator,
     return minorType.getNewVector(name, this, allocator, schemaCallBack);
   }
 
+  public FieldVector createNewSingleVector(Field field, BufferAllocator allocator, CallBack schemaCallBack) {
+    MinorType minorType = Types.getMinorTypeForArrowType(type);
+    return minorType.getNewVector(field, allocator, schemaCallBack);
+  }
+
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ByteFunctionHelpers.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ByteFunctionHelpers.java
index 8140103bfc4..8dbdc4987ce 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/ByteFunctionHelpers.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ByteFunctionHelpers.java
@@ -39,7 +39,7 @@ private ByteFunctionHelpers() {}
    * @param right  Right ArrowBuf for comparison
    * @param rStart start offset in the buffer
    * @param rEnd   end offset in the buffer
-   * @return 1 if left input is greater, -1 if left input is smaller, 0 otherwise
+   * @return 1 if equals, 0 otherwise
    */
   public static final int equal(final ArrowBuf left, int lStart, int lEnd, final ArrowBuf right, int rStart, int rEnd) {
     if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
@@ -49,6 +49,48 @@ public static final int equal(final ArrowBuf left, int lStart, int lEnd, final A
     return memEqual(left.memoryAddress(), lStart, lEnd, right.memoryAddress(), rStart, rEnd);
   }
 
+  /**
+   * Compute hashCode with the given {@link ArrowBuf} and start/end index.
+   */
+  public static final int hash(final ArrowBuf buf, int start, int end) {
+    long addr = buf.memoryAddress();
+    int len = end - start;
+    long pos = addr + start;
+
+    int hash = 0;
+
+    while (len > 7) {
+      long value = PlatformDependent.getLong(pos);
+      hash = comebineHash(hash, Long.hashCode(value));
+
+      pos += 8;
+      len -= 8;
+    }
+
+    while (len > 3) {
+      int value = PlatformDependent.getInt(pos);
+      hash = comebineHash(hash, value);
+
+      pos += 4;
+      len -= 4;
+    }
+
+    while (len-- != 0) {
+      byte value = PlatformDependent.getByte(pos);
+      hash = comebineHash(hash, value);
+      pos ++;
+    }
+
+    return hash;
+  }
+
+  /**
+   * Generate a new hashCode with the given current hashCode and new hashCode.
+   */
+  public static int comebineHash(int currentHash, int newHash) {
+    return currentHash * 31 + newHash;
+  }
+
   private static int memEqual(final long laddr, int lStart, int lEnd, final long raddr, int rStart,
                                     final int rEnd) {
 
@@ -67,6 +109,18 @@ private static int memEqual(final long laddr, int lStart, int lEnd, final long r
         rPos += 8;
         n -= 8;
       }
+
+      while (n > 3) {
+        int leftInt = PlatformDependent.getInt(lPos);
+        int rightInt = PlatformDependent.getInt(rPos);
+        if (leftInt != rightInt) {
+          return 0;
+        }
+        lPos += 4;
+        rPos += 4;
+        n -= 4;
+      }
+
       while (n-- != 0) {
         byte leftByte = PlatformDependent.getByte(lPos);
         byte rightByte = PlatformDependent.getByte(rPos);
@@ -126,13 +180,24 @@ private static int memcmp(
       long leftLong = PlatformDependent.getLong(lPos);
       long rightLong = PlatformDependent.getLong(rPos);
       if (leftLong != rightLong) {
-        return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
+        return unsignedLongCompare(leftLong, rightLong);
       }
       lPos += 8;
       rPos += 8;
       n -= 8;
     }
 
+    while (n > 3) {
+      int leftInt = PlatformDependent.getInt(lPos);
+      int rightInt = PlatformDependent.getInt(rPos);
+      if (leftInt != rightInt) {
+        return unsignedIntCompare(leftInt, rightInt);
+      }
+      lPos += 4;
+      rPos += 4;
+      n -= 4;
+    }
+
     while (n-- != 0) {
       byte leftByte = PlatformDependent.getByte(lPos);
       byte rightByte = PlatformDependent.getByte(rPos);
@@ -189,6 +254,9 @@ public static int unsignedLongCompare(long a, long b) {
     return Long.compare(a ^ Long.MIN_VALUE, b ^ Long.MIN_VALUE);
   }
 
+  public static int unsignedIntCompare(int a, int b) {
+    return Integer.compare(a ^ Integer.MIN_VALUE, b ^ Integer.MIN_VALUE);
+  }
 
   private static int memcmp(
       final long laddr,
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index d091152f64b..a9f7a179228 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -31,6 +31,8 @@ public class DecimalUtility {
   private DecimalUtility() {}
 
   public static final int DECIMAL_BYTE_LENGTH = 16;
+  public static final byte [] zeroes = new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+  public static final byte [] minus_one = new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
 
   /**
    * Read an ArrowType.Decimal at the given value index in the ArrowBuf and convert to a BigDecimal
@@ -100,8 +102,7 @@ public static boolean checkPrecisionAndScale(BigDecimal value, int vectorPrecisi
    */
   public static void writeBigDecimalToArrowBuf(BigDecimal value, ArrowBuf bytebuf, int index) {
     final byte[] bytes = value.unscaledValue().toByteArray();
-    final int padValue = value.signum() == -1 ? 0xFF : 0;
-    writeByteArrayToArrowBuf(bytes, bytebuf, index, padValue);
+    writeByteArrayToArrowBufHelper(bytes, bytebuf, index);
   }
 
   /**
@@ -120,10 +121,10 @@ public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index)
    * width.
    */
   public static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index) {
-    writeByteArrayToArrowBuf(bytes, bytebuf, index, 0);
+    writeByteArrayToArrowBufHelper(bytes, bytebuf, index);
   }
 
-  private static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index, int padValue) {
+  private static void writeByteArrayToArrowBufHelper(byte[] bytes, ArrowBuf bytebuf, int index) {
     final int startIndex = index * DECIMAL_BYTE_LENGTH;
     if (bytes.length > DECIMAL_BYTE_LENGTH) {
       throw new UnsupportedOperationException("Decimal size greater than 16 bytes");
@@ -131,23 +132,13 @@ private static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int
 
     // Decimal stored as little endian, need to swap data bytes before writing to ArrowBuf
     byte[] bytesLE = new byte[bytes.length];
-    int stop = bytes.length / 2;
-    for (int i = 0, j; i < stop; i++) {
-      j = (bytes.length - 1) - i;
-      bytesLE[i] = bytes[j];
-      bytesLE[j] = bytes[i];
-    }
-    if (bytes.length % 2 != 0) {
-      int i = (bytes.length / 2);
-      bytesLE[i] = bytes[i];
+    for (int i = 0; i < bytes.length; i++) {
+      bytesLE[i] = bytes[bytes.length - 1 - i];
     }
 
     // Write LE data
+    byte [] padByes = bytes[0] < 0 ? minus_one : zeroes;
     bytebuf.setBytes(startIndex, bytesLE, 0, bytes.length);
-
-    // Write padding after data
-    for (int i = bytes.length; i < DECIMAL_BYTE_LENGTH; i++) {
-      bytebuf.setByte(startIndex + i, padValue);
-    }
+    bytebuf.setBytes(startIndex + bytes.length, padByes, 0, DECIMAL_BYTE_LENGTH - bytes.length);
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
index f62371d7525..9d52427f536 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -18,12 +18,16 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 
 import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.Test;
 
 import io.netty.buffer.ArrowBuf;
 import io.netty.buffer.PooledByteBufAllocatorL;
+import io.netty.util.internal.PlatformDependent;
 
 public class TestBitVectorHelper {
   @Test
@@ -63,4 +67,86 @@ public void testGetNullCount() throws Exception {
     count = BitVectorHelper.getNullCount(validityBuffer, 11);
     assertEquals(count, 5);
   }
+
+  @Test
+  public void testAllBitsNull() {
+    final int bufferLength = 32 * 1024;
+    try (RootAllocator allocator = new RootAllocator(bufferLength);
+    ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+      validityBuffer.setZero(0, bufferLength);
+      int bitLength = 1024;
+      assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      bitLength = 1027;
+      assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      validityBuffer.setZero(0, bufferLength);
+      bitLength = 1025;
+      BitVectorHelper.setValidityBit(validityBuffer, 12, 1);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      validityBuffer.setZero(0, bufferLength);
+      bitLength = 1025;
+      BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      validityBuffer.setZero(0, bufferLength);
+      bitLength = 1026;
+      BitVectorHelper.setValidityBit(validityBuffer, 1024, 1);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      validityBuffer.setZero(0, bufferLength);
+      bitLength = 1027;
+      BitVectorHelper.setValidityBit(validityBuffer, 1025, 1);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+      validityBuffer.setZero(0, bufferLength);
+      bitLength = 1031;
+      BitVectorHelper.setValidityBit(validityBuffer, 1029, 1);
+      BitVectorHelper.setValidityBit(validityBuffer, 1030, 1);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+    }
+  }
+
+  @Test
+  public void testAllBitsSet() {
+    final int bufferLength = 32 * 1024;
+    try (RootAllocator allocator = new RootAllocator(bufferLength);
+         ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      int bitLength = 1024;
+      assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      bitLength = 1028;
+      assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      bitLength = 1025;
+      BitVectorHelper.setValidityBit(validityBuffer, 12, 0);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      bitLength = 1025;
+      BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      bitLength = 1026;
+      BitVectorHelper.setValidityBit(validityBuffer, 1024, 0);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      bitLength = 1027;
+      BitVectorHelper.setValidityBit(validityBuffer, 1025, 0);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+      PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      bitLength = 1031;
+      BitVectorHelper.setValidityBit(validityBuffer, 1029, 0);
+      BitVectorHelper.setValidityBit(validityBuffer, 1030, 0);
+      assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
index a04326b666f..0d2bce9f3f1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -17,15 +17,24 @@
 
 package org.apache.arrow.vector;
 
+import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
 import static org.apache.arrow.vector.TestUtils.newVarCharVector;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
 
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.dictionary.Dictionary;
 import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.FieldType;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -142,4 +151,181 @@ public void testEncodeLargeVector() {
       }
     }
   }
+
+  private void writeListVector(UnionListWriter writer, int[] values) {
+    writer.startList();
+    for (int v: values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endList();
+  }
+
+  @Test
+  public void testEncodeList() {
+    // Create a new value vector
+    try (final ListVector vector = ListVector.empty("vector", allocator);
+        final ListVector dictionaryVector = ListVector.empty("dict", allocator);) {
+
+      UnionListWriter writer = vector.getWriter();
+      writer.allocate();
+
+      //set some values
+      writeListVector(writer, new int[]{10, 20});
+      writeListVector(writer, new int[]{10, 20});
+      writeListVector(writer, new int[]{10, 20});
+      writeListVector(writer, new int[]{30, 40, 50});
+      writeListVector(writer, new int[]{30, 40, 50});
+      writeListVector(writer, new int[]{10, 20});
+
+      writer.setValueCount(6);
+
+      UnionListWriter dictWriter = dictionaryVector.getWriter();
+      dictWriter.allocate();
+
+      writeListVector(dictWriter, new int[]{10, 20});
+      writeListVector(dictWriter, new int[]{30, 40, 50});
+
+      dictWriter.setValueCount(2);
+
+      Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+      try (final ValueVector encoded = (FieldVector) DictionaryEncoder.encode(vector, dictionary)) {
+        // verify indices
+        assertEquals(IntVector.class, encoded.getClass());
+
+        IntVector index = ((IntVector)encoded);
+        assertEquals(6, index.getValueCount());
+        assertEquals(0, index.get(0));
+        assertEquals(0, index.get(1));
+        assertEquals(0, index.get(2));
+        assertEquals(1, index.get(3));
+        assertEquals(1, index.get(4));
+        assertEquals(0, index.get(5));
+
+        // now run through the decoder and verify we get the original back
+        try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+          assertEquals(vector.getClass(), decoded.getClass());
+          assertEquals(vector.getValueCount(), decoded.getValueCount());
+          for (int i = 0; i < 5; i++) {
+            assertEquals(vector.getObject(i), decoded.getObject(i));
+          }
+        }
+      }
+    }
+  }
+
+  private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+    writer.start();
+    writer.integer("f0").writeInt(value1);
+    writer.bigInt("f1").writeBigInt(value2);
+    writer.end();
+  }
+
+  @Test
+  public void testEncodeStruct() {
+    // Create a new value vector
+    try (final StructVector vector = StructVector.empty("vector", allocator);
+        final StructVector dictionaryVector = StructVector.empty("dict", allocator);) {
+      vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+      vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+      dictionaryVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+      dictionaryVector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+      NullableStructWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writeStructVector(writer, 1, 10L);
+      writeStructVector(writer, 1, 10L);
+      writeStructVector(writer, 1, 10L);
+      writeStructVector(writer, 2, 20L);
+      writeStructVector(writer, 2, 20L);
+      writeStructVector(writer, 2, 20L);
+      writeStructVector(writer, 1, 10L);
+
+      writer.setValueCount(7);
+
+      NullableStructWriter dictWriter = dictionaryVector.getWriter();
+      dictWriter.allocate();
+
+      writeStructVector(dictWriter, 1, 10L);
+      writeStructVector(dictWriter, 2, 20L);
+
+
+      dictionaryVector.setValueCount(2);
+
+      Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+      try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+        // verify indices
+        assertEquals(IntVector.class, encoded.getClass());
+
+        IntVector index = ((IntVector)encoded);
+        assertEquals(7, index.getValueCount());
+        assertEquals(0, index.get(0));
+        assertEquals(0, index.get(1));
+        assertEquals(0, index.get(2));
+        assertEquals(1, index.get(3));
+        assertEquals(1, index.get(4));
+        assertEquals(1, index.get(5));
+        assertEquals(0, index.get(6));
+
+        // now run through the decoder and verify we get the original back
+        try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+          assertEquals(vector.getClass(), decoded.getClass());
+          assertEquals(vector.getValueCount(), decoded.getValueCount());
+          for (int i = 0; i < 5; i++) {
+            assertEquals(vector.getObject(i), decoded.getObject(i));
+          }
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testEncodeBinaryVector() {
+    // Create a new value vector
+    try (final VarBinaryVector vector = newVarBinaryVector("foo", allocator);
+        final VarBinaryVector dictionaryVector = newVarBinaryVector("dict", allocator);) {
+      vector.allocateNew(512, 5);
+
+      // set some values
+      vector.setSafe(0, zero, 0, zero.length);
+      vector.setSafe(1, one, 0, one.length);
+      vector.setSafe(2, one, 0, one.length);
+      vector.setSafe(3, two, 0, two.length);
+      vector.setSafe(4, zero, 0, zero.length);
+      vector.setValueCount(5);
+
+      // set some dictionary values
+      dictionaryVector.allocateNew(512, 3);
+      dictionaryVector.setSafe(0, zero, 0, zero.length);
+      dictionaryVector.setSafe(1, one, 0, one.length);
+      dictionaryVector.setSafe(2, two, 0, two.length);
+      dictionaryVector.setValueCount(3);
+
+      Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+      try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+        // verify indices
+        assertEquals(IntVector.class, encoded.getClass());
+
+        IntVector index = ((IntVector)encoded);
+        assertEquals(5, index.getValueCount());
+        assertEquals(0, index.get(0));
+        assertEquals(1, index.get(1));
+        assertEquals(1, index.get(2));
+        assertEquals(2, index.get(3));
+        assertEquals(0, index.get(4));
+
+        // now run through the decoder and verify we get the original back
+        try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dictionary)) {
+          assertEquals(vector.getClass(), decoded.getClass());
+          assertEquals(vector.getValueCount(), decoded.getValueCount());
+          for (int i = 0; i < 5; i++) {
+            assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
index 14476a1c4ff..d834d03395f 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
@@ -258,4 +258,10 @@ public void setSetSafeWithInvalidInput() throws Exception {
     vector.setSafe(0, largeNullableHolder);
     vector.setSafe(0, largeBuf);
   }
+
+  @Test
+  public void testGetNull() {
+    vector.setNull(0);
+    assertNull(vector.get(0));
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index 68102b1c32a..96372c0f4ec 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -135,7 +135,7 @@ public void testSetLastSetUsage() throws Exception {
       BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
 
       /* check current lastSet */
-      assertEquals(Integer.toString(0), Integer.toString(listVector.getLastSet()));
+      assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
 
       int index = 0;
       int offset = 0;
@@ -165,7 +165,7 @@ public void testSetLastSetUsage() throws Exception {
       offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 8);
 
       /* check current lastSet */
-      assertEquals(Integer.toString(0), Integer.toString(listVector.getLastSet()));
+      assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
 
       /* set lastset and arbitrary valuecount for list vector.
        *
@@ -206,7 +206,7 @@ public void testSetLastSetUsage() throws Exception {
        *                [15, 16, 17]
        *              }
        */
-      listVector.setLastSet(3);
+      listVector.setLastSet(2);
       listVector.setValueCount(10);
 
       /* (3+2+3)/10 */
@@ -307,7 +307,7 @@ public void testSplitAndTransfer() throws Exception {
 
       listVector.setValueCount(5);
 
-      assertEquals(5, listVector.getLastSet());
+      assertEquals(4, listVector.getLastSet());
 
       /* get offset buffer */
       final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
@@ -501,7 +501,7 @@ public void testNestedListVector() throws Exception {
 
       listWriter.endList();
 
-      assertEquals(2, listVector.getLastSet());
+      assertEquals(1, listVector.getLastSet());
 
       listVector.setValueCount(2);
 
@@ -635,7 +635,7 @@ public void testNestedListVector2() throws Exception {
 
       listWriter.endList();
 
-      assertEquals(2, listVector.getLastSet());
+      assertEquals(1, listVector.getLastSet());
 
       listVector.setValueCount(2);
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
index a16390c2e23..55a0a41bab5 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -324,7 +324,7 @@ public void testSplitAndTransfer() throws Exception {
 
       mapVector.setValueCount(5);
 
-      assertEquals(5, mapVector.getLastSet());
+      assertEquals(4, mapVector.getLastSet());
 
       /* get offset buffer */
       final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
@@ -552,7 +552,7 @@ public void testMapWithListValue() throws Exception {
 
       mapWriter.endMap();
 
-      assertEquals(2, mapVector.getLastSet());
+      assertEquals(1, mapVector.getLastSet());
 
       mapWriter.setValueCount(2);
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
index 7706be4286d..9d156eb9a2d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
@@ -18,15 +18,20 @@
 package org.apache.arrow.vector;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.holders.ComplexHolder;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.FieldType;
+
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -100,4 +105,31 @@ public void testAllocateAfterReAlloc() throws Exception {
       Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
     }
   }
+
+  @Test
+  public void testReadNullValue() {
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("k1", "v1");
+    FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+    try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+      MinorType childtype = MinorType.INT;
+      vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
+      vector.setValueCount(2);
+
+      IntVector intVector = (IntVector) vector.getChild("intchild");
+      intVector.setSafe(0, 100);
+      vector.setIndexDefined(0);
+      intVector.setNull(1);
+      vector.setNull(1);
+
+      ComplexHolder holder = new ComplexHolder();
+      vector.get(0, holder);
+      assertNotEquals(0, holder.isSet);
+      assertNotNull(holder.reader);
+
+      vector.get(1, holder);
+      assertEquals(0, holder.isSet);
+      assertNull(holder.reader);
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 0c1ae548667..f8f9c7671ad 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -906,13 +906,7 @@ public void testNullableVarType1() {
 
       // Ensure null value throws.
       boolean b = false;
-      try {
-        vector.get(8);
-      } catch (IllegalStateException e) {
-        b = true;
-      } finally {
-        assertTrue(b);
-      }
+      assertNull(vector.get(8));
     }
   }
 
@@ -942,14 +936,7 @@ public void testNullableVarType2() {
       assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6));
 
       // Ensure null value throws.
-      boolean b = false;
-      try {
-        vector.get(7);
-      } catch (IllegalStateException e) {
-        b = true;
-      } finally {
-        assertTrue(b);
-      }
+      assertNull(vector.get(7));
     }
   }
 
@@ -2173,4 +2160,19 @@ public void testSetNullableVarBinaryHolderSafe() {
       buf.close();
     }
   }
+
+  @Test
+  public void testGetNullFromVariableWidthVector() {
+    try (VarCharVector varCharVector = new VarCharVector("varcharvec", allocator);
+    VarBinaryVector varBinaryVector = new VarBinaryVector("varbinary", allocator)) {
+      varCharVector.allocateNew(10, 1);
+      varBinaryVector.allocateNew(10, 1);
+
+      varCharVector.setNull(0);
+      varBinaryVector.setNull(0);
+
+      assertNull(varCharVector.get(0));
+      assertNull(varBinaryVector.get(0));
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
new file mode 100644
index 00000000000..089f1f84ff8
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorAlloc {
+  private BufferAllocator rootAllocator;
+
+  @Before
+  public void init() {
+    rootAllocator = new RootAllocator(Long.MAX_VALUE);
+  }
+
+  @After
+  public void terminate() throws Exception {
+    rootAllocator.close();
+  }
+
+  private static Field field(String name, ArrowType type) {
+    return new Field(name, new FieldType(true, type, null), Collections.emptyList());
+  }
+
+  @Test
+  public void testVectorAllocWithField() {
+    Schema schema = new Schema(Arrays.asList(
+        field("TINYINT", MinorType.TINYINT.getType()),
+        field("SMALLINT", MinorType.SMALLINT.getType()),
+        field("INT", MinorType.INT.getType()),
+        field("BIGINT", MinorType.BIGINT.getType()),
+        field("UINT1", MinorType.UINT1.getType()),
+        field("UINT2", MinorType.UINT2.getType()),
+        field("UINT4", MinorType.UINT4.getType()),
+        field("UINT8", MinorType.UINT8.getType()),
+        field("FLOAT4", MinorType.FLOAT4.getType()),
+        field("FLOAT8", MinorType.FLOAT8.getType()),
+        field("UTF8", MinorType.VARCHAR.getType()),
+        field("VARBINARY", MinorType.VARBINARY.getType()),
+        field("BIT", MinorType.BIT.getType()),
+        field("DECIMAL", new Decimal(38, 5)),
+        field("FIXEDSIZEBINARY", new FixedSizeBinary(50)),
+        field("DATEDAY", MinorType.DATEDAY.getType()),
+        field("DATEMILLI", MinorType.DATEMILLI.getType()),
+        field("TIMESEC", MinorType.TIMESEC.getType()),
+        field("TIMEMILLI", MinorType.TIMEMILLI.getType()),
+        field("TIMEMICRO", MinorType.TIMEMICRO.getType()),
+        field("TIMENANO", MinorType.TIMENANO.getType()),
+        field("TIMESTAMPSEC", MinorType.TIMESTAMPSEC.getType()),
+        field("TIMESTAMPMILLI", MinorType.TIMESTAMPMILLI.getType()),
+        field("TIMESTAMPMICRO", MinorType.TIMESTAMPMICRO.getType()),
+        field("TIMESTAMPNANO", MinorType.TIMESTAMPNANO.getType()),
+        field("TIMESTAMPSECTZ", new Timestamp(TimeUnit.SECOND, "PST")),
+        field("TIMESTAMPMILLITZ", new Timestamp(TimeUnit.MILLISECOND, "PST")),
+        field("TIMESTAMPMICROTZ", new Timestamp(TimeUnit.MICROSECOND, "PST")),
+        field("TIMESTAMPNANOTZ", new Timestamp(TimeUnit.NANOSECOND, "PST")),
+        field("INTERVALDAY", MinorType.INTERVALDAY.getType()),
+        field("INTERVALYEAR", MinorType.INTERVALYEAR.getType()),
+        field("DURATION", new Duration(TimeUnit.MILLISECOND))
+    ));
+
+    try (BufferAllocator allocator = rootAllocator.newChildAllocator("child", 0, Long.MAX_VALUE)) {
+      for (Field field : schema.getFields()) {
+        try (FieldVector vector = field.createVector(allocator)) {
+          assertEquals(vector.getMinorType(),
+              Types.getMinorTypeForArrowType(field.getFieldType().getType()));
+          vector.allocateNew();
+        }
+      }
+    }
+  }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
index f8643b8af71..93ebf6c4ca8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -108,7 +108,7 @@ public void testListTypeReset() {
       variableList.endValue(0, 0);
       variableList.setValueCount(1);
       resetVectorAndVerify(variableList, variableList.getBuffers(false));
-      assertEquals(0, variableList.getLastSet());
+      assertEquals(-1, variableList.getLastSet());
 
       // FixedSizeListVector
       fixedList.allocateNewSafe();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
index 480dcaca0f4..f9525f45c1a 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
@@ -17,10 +17,24 @@
 
 package org.apache.arrow.vector;
 
+import static junit.framework.TestCase.assertTrue;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -75,4 +89,83 @@ private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, in
     assertEquals(vec2.getValueCount(), count);
     assertEquals(vsr.getRowCount(), count);
   }
+
+  private VectorSchemaRoot createBatch() {
+    FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/null);
+    FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/null);
+
+    // create the schema
+    List<Field> schemaFields  = new ArrayList<>();
+    Field childField = new Field("varCharCol", varCharType, null);
+    List<Field> childFields = new ArrayList<>();
+    childFields.add(childField);
+    schemaFields.add(new Field("listCol", listType, childFields));
+    Schema schema = new Schema(schemaFields);
+
+    VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator);
+    // get and allocate the vector
+    ListVector vector = (ListVector) schemaRoot.getVector("listCol");
+    vector.allocateNew();
+
+    // write data to the vector
+    UnionListWriter writer = vector.getWriter();
+
+    writer.setPosition(0);
+
+    // write data vector(0)
+    writer.startList();
+
+    // write data vector(0)(0)
+    writer.list().startList();
+
+    // According to the schema above, the list element should have varchar type.
+    // When we write a big int, the original writer cannot handle this, so the writer will
+    // be promoted, and the vector structure will be different from the schema.
+    writer.list().bigInt().writeBigInt(0);
+    writer.list().bigInt().writeBigInt(1);
+    writer.list().endList();
+
+    // write data vector(0)(1)
+    writer.list().startList();
+    writer.list().float8().writeFloat8(3.0D);
+    writer.list().float8().writeFloat8(7.0D);
+    writer.list().endList();
+
+    // finish data vector(0)
+    writer.endList();
+
+    writer.setPosition(1);
+
+    // write data vector(1)
+    writer.startList();
+
+    // write data vector(1)(0)
+    writer.list().startList();
+    writer.list().integer().writeInt(3);
+    writer.list().integer().writeInt(2);
+    writer.list().endList();
+
+    // finish data vector(1)
+    writer.endList();
+
+    vector.setValueCount(2);
+
+    return schemaRoot;
+  }
+
+  @Test
+  public void testSchemaSync() {
+    //create vector schema root
+    try (VectorSchemaRoot schemaRoot = createBatch()) {
+      Schema newSchema = new Schema(
+              schemaRoot.getFieldVectors().stream().map(vec -> vec.getField()).collect(Collectors.toList()));
+
+      assertNotEquals(newSchema, schemaRoot.getSchema());
+      assertTrue(schemaRoot.syncSchema());
+      assertEquals(newSchema, schemaRoot.getSchema());
+
+      // no schema update this time.
+      assertFalse(schemaRoot.syncSchema());
+    }
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
index 83d15d1e3a6..21b9c447f42 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -643,7 +643,7 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) {
     }
 
     // ListVector lastSet should be the index of last value + 1
-    Assert.assertEquals(listVector.getLastSet(), count);
+    Assert.assertEquals(listVector.getLastSet(), count - 1);
 
     // VarBinaryVector lastSet should be the index of last value
     VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
index 20d270c8988..792bd29903b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -34,6 +34,7 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
@@ -204,6 +205,16 @@ public UUID getObject(int index) {
       return new UUID(bb.getLong(), bb.getLong());
     }
 
+    @Override
+    public int hashCode(int index) {
+      return getUnderlyingVector().hashCode(index);
+    }
+
+    @Override
+    public boolean equals(int index, ValueVector to, int toIndex) {
+      return getUnderlyingVector().equals(index, to, toIndex);
+    }
+
     public void set(int index, UUID uuid) {
       ByteBuffer bb = ByteBuffer.allocate(16);
       bb.putLong(uuid.getMostSignificantBits());
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
new file mode 100644
index 00000000000..1840b4e707a
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+import io.netty.buffer.ArrowBuf;
+
+public class DecimalUtilityTest {
+  private static final BigInteger MAX_BIG_INT = java.math.BigInteger.valueOf(10).pow(38)
+          .subtract(java.math.BigInteger.ONE);
+  private static final BigDecimal MAX_DECIMAL = new java.math.BigDecimal(MAX_BIG_INT, 0);
+  private static final BigInteger MIN_BIG_INT = MAX_BIG_INT.multiply(BigInteger.valueOf(-1));
+  private static final BigDecimal MIN_DECIMAL = new java.math.BigDecimal(MIN_BIG_INT, 0);
+
+  @Test
+  public void testSetByteArrayInDecimalArrowBuf() {
+    try (BufferAllocator allocator = new RootAllocator(128);
+         ArrowBuf buf = allocator.buffer(16);
+    ) {
+      int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+      for (int val : intValues) {
+        buf.clear();
+        DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = BigDecimal.valueOf(val);
+        Assert.assertEquals(expected, actual);
+      }
+
+      long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+      for (long val : longValues) {
+        buf.clear();
+        DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = BigDecimal.valueOf(val);
+        Assert.assertEquals(expected, actual);
+      }
+
+      BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT, new BigInteger("0"), MIN_BIG_INT};
+      for (BigInteger val : decimals) {
+        buf.clear();
+        DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = new BigDecimal(val);
+        Assert.assertEquals(expected, actual);
+      }
+    }
+  }
+
+  @Test
+  public void testSetBigDecimalInDecimalArrowBuf() {
+    try (BufferAllocator allocator = new RootAllocator(128);
+         ArrowBuf buf = allocator.buffer(16);
+    ) {
+      int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+      for (int val : intValues) {
+        buf.clear();
+        DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = BigDecimal.valueOf(val);
+        Assert.assertEquals(expected, actual);
+      }
+
+      long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+      for (long val : longValues) {
+        buf.clear();
+        DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = BigDecimal.valueOf(val);
+        Assert.assertEquals(expected, actual);
+      }
+
+      BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT, new BigInteger("0"), MIN_BIG_INT};
+      for (BigInteger val : decimals) {
+        buf.clear();
+        DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0);
+        BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0);
+        BigDecimal expected = new BigDecimal(val);
+        Assert.assertEquals(expected, actual);
+      }
+    }
+  }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestByteFunctionHelpers.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestByteFunctionHelpers.java
new file mode 100644
index 00000000000..cb879278af8
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestByteFunctionHelpers.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import io.netty.buffer.ArrowBuf;
+
+public class TestByteFunctionHelpers {
+
+  private BufferAllocator allocator;
+
+  private static final int SIZE = 100;
+
+  @Before
+  public void init() {
+    allocator = new RootAllocator(Long.MAX_VALUE);
+
+  }
+
+  @After
+  public void terminate() throws Exception {
+    allocator.close();
+  }
+
+  @Test
+  public void testEquals() {
+    ArrowBuf buffer1 = allocator.buffer(SIZE);
+    ArrowBuf buffer2 = allocator.buffer(SIZE);
+
+    for (int i = 0; i < SIZE; i++) {
+      buffer1.setByte(i, i);
+      buffer2.setByte(i, i);
+    }
+
+    //test three cases, length>8, length>3, length<3
+
+    assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1,
+        buffer2, 0, SIZE - 1));
+    assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 6,
+        buffer2, 0, 6));
+    assertEquals(1, ByteFunctionHelpers.equal(buffer1, 0, 2,
+        buffer2, 0, 2));
+
+    //change value at index1
+    buffer1.setByte(1, 10);
+
+    assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, SIZE - 1,
+        buffer2, 0, SIZE - 1));
+    assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 6,
+        buffer2, 0, 6));
+    assertEquals(0, ByteFunctionHelpers.equal(buffer1, 0, 2,
+        buffer2, 0, 2));
+
+    buffer1.close();
+    buffer2.close();
+
+  }
+
+  @Test
+  public void testCompare() {
+    ArrowBuf buffer1 = allocator.buffer(SIZE);
+    ArrowBuf buffer2 = allocator.buffer(SIZE);
+
+    for (int i = 0; i < SIZE; i++) {
+      buffer1.setByte(i, i);
+      buffer2.setByte(i, i);
+    }
+
+    //test three cases, length>8, length>3, length<3
+
+    assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+        buffer2, 0, SIZE - 1));
+    assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 6,
+        buffer2, 0, 6));
+    assertEquals(0, ByteFunctionHelpers.compare(buffer1, 0, 2,
+        buffer2, 0, 2));
+
+    //change value at index 1
+    buffer1.setByte(1, 0);
+
+    assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, SIZE - 1,
+        buffer2, 0, SIZE - 1));
+    assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 6,
+        buffer2, 0, 6));
+    assertEquals(-1, ByteFunctionHelpers.compare(buffer1, 0, 2,
+        buffer2, 0, 2));
+
+    buffer1.close();
+    buffer2.close();
+
+  }
+}
diff --git a/js/package.json b/js/package.json
index 41d1d6dd83f..11c7c89aad8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -113,5 +113,5 @@
   "engines": {
     "node": ">=11.12"
   },
-  "version": "0.14.0"
+  "version": "1.0.0-SNAPSHOT"
 }
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 5c6ec356ba8..2800beb3272 100755
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -18,7 +18,7 @@
 cmake_minimum_required(VERSION 3.2)
 set(CMAKE_CXX_STANDARD 11)
 
-set(MLARROW_VERSION "0.14.0")
+set(MLARROW_VERSION "1.0.0-SNAPSHOT")
 string(REGEX MATCH
   "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
diff --git a/matlab/src/feather_reader.cc b/matlab/src/feather_reader.cc
index 1c1b21cd66b..484c300e0e4 100644
--- a/matlab/src/feather_reader.cc
+++ b/matlab/src/feather_reader.cc
@@ -37,46 +37,38 @@ namespace matlab {
 namespace internal {
 
 // Read the name of variable i from the Feather file as a mxArray*.
-mxArray* ReadVariableName(const std::shared_ptr<Column>& column) {
-  return matlab::util::ConvertUTF8StringToUTF16CharMatrix(column->name());
+mxArray* ReadVariableName(const std::string& column_name) {
+  return matlab::util::ConvertUTF8StringToUTF16CharMatrix(column_name);
 }
 
 template <typename ArrowDataType>
-mxArray* ReadNumericVariableData(const std::shared_ptr<Column>& column) {
+mxArray* ReadNumericVariableData(const std::shared_ptr<Array>& column) {
   using MatlabType = typename MatlabTraits<ArrowDataType>::MatlabType;
   using ArrowArrayType = typename TypeTraits<ArrowDataType>::ArrayType;
 
-  std::shared_ptr<ChunkedArray> chunked_array = column->data();
-  const int32_t num_chunks = chunked_array->num_chunks();
-
   const mxClassID matlab_class_id = MatlabTraits<ArrowDataType>::matlab_class_id;
   // Allocate a numeric mxArray* with the correct mxClassID based on the type of the
-  // arrow::Column.
+  // arrow::Array.
   mxArray* variable_data =
       mxCreateNumericMatrix(column->length(), 1, matlab_class_id, mxREAL);
 
-  int64_t mx_array_offset = 0;
-  // Iterate over each arrow::Array in the arrow::ChunkedArray.
-  for (int32_t i = 0; i < num_chunks; ++i) {
-    std::shared_ptr<Array> array = chunked_array->chunk(i);
-    const int64_t chunk_length = array->length();
-    std::shared_ptr<ArrowArrayType> integer_array = std::static_pointer_cast<ArrowArrayType>(array);
-
-    // Get a raw pointer to the Arrow array data.
-    const MatlabType* source = integer_array->raw_values();
-
-    // Get a mutable pointer to the MATLAB array data and std::copy the
-    // Arrow array data into it.
-    MatlabType* destination = MatlabTraits<ArrowDataType>::GetData(variable_data);
-    std::copy(source, source + chunk_length, destination + mx_array_offset);
-    mx_array_offset += chunk_length;
-  }
+  std::shared_ptr<ArrowArrayType> integer_array =
+      std::static_pointer_cast<ArrowArrayType>(column);
+
+  // Get a raw pointer to the Arrow array data.
+  const MatlabType* source = integer_array->raw_values();
+
+  // Get a mutable pointer to the MATLAB array data and std::copy the
+  // Arrow array data into it.
+  MatlabType* destination = MatlabTraits<ArrowDataType>::GetData(variable_data);
+  std::copy(source, source + column->length(), destination);
 
   return variable_data;
 }
 
 // Read the data of variable i from the Feather file as a mxArray*.
-mxArray* ReadVariableData(const std::shared_ptr<Column>& column) {
+mxArray* ReadVariableData(const std::shared_ptr<Array>& column,
+                          const std::string& column_name) {
   std::shared_ptr<DataType> type = column->type();
 
   switch (type->id()) {
@@ -103,7 +95,7 @@ mxArray* ReadVariableData(const std::shared_ptr<Column>& column) {
     default: {
       mexErrMsgIdAndTxt("MATLAB:arrow:UnsupportedArrowType",
                         "Unsupported arrow::Type '%s' for variable '%s'",
-                        type->name().c_str(), column->name().c_str());
+                        type->name().c_str(), column_name.c_str());
       break;
     }
   }
@@ -125,22 +117,22 @@ void BitUnpackBuffer(const std::shared_ptr<Buffer>& source, int64_t length,
   arrow::internal::VisitBitsUnrolled(source_data, start_offset, length, visitFcn);
 }
 
-// Populates the validity bitmap from an arrow::Array or an arrow::Column,
+// Populates the validity bitmap from an arrow::Array.
 // writes to a zero-initialized destination buffer.
 // Implements a fast path for the fully-valid and fully-invalid cases.
 // Returns true if the destination buffer was successfully populated.
-template <typename ArrowType>
-bool TryBitUnpackFastPath(const std::shared_ptr<ArrowType>& array, mxLogical* destination) {
+bool TryBitUnpackFastPath(const std::shared_ptr<Array>& array,
+                          mxLogical* destination) {
   const int64_t null_count = array->null_count();
   const int64_t length = array->length();
 
   if (null_count == length) {
-    // The source array/column is filled with invalid values. Since mxCreateLogicalMatrix
+    // The source array is filled with invalid values. Since mxCreateLogicalMatrix
     // zero-initializes the destination buffer, we can return without changing anything
     // in the destination buffer.
     return true;
   } else if (null_count == 0) {
-    // The source array/column contains only valid values. Fill the destination buffer
+    // The source array contains only valid values. Fill the destination buffer
     // with 'true'.
     std::fill(destination, destination + length, true);
     return true;
@@ -152,7 +144,7 @@ bool TryBitUnpackFastPath(const std::shared_ptr<ArrowType>& array, mxLogical* de
 
 // Read the validity (null) bitmap of variable i from the Feather
 // file as an mxArray*.
-mxArray* ReadVariableValidityBitmap(const std::shared_ptr<Column>& column) {
+mxArray* ReadVariableValidityBitmap(const std::shared_ptr<Array>& column) {
   // Allocate an mxLogical array to store the validity (null) bitmap values.
   // Note: All Arrow arrays can have an associated validity (null) bitmap.
   // The Apache Arrow specification defines 0 (false) to represent an
@@ -161,38 +153,17 @@ mxArray* ReadVariableValidityBitmap(const std::shared_ptr<Column>& column) {
   mxArray* validity_bitmap = mxCreateLogicalMatrix(column->length(), 1);
   mxLogical* validity_bitmap_unpacked = mxGetLogicals(validity_bitmap);
 
-  // The Apache Arrow specification allows validity (null) bitmaps
-  // to be unallocated if there are no null values. In this case,
-  // we simply return a logical array filled with the value true.
-  if (TryBitUnpackFastPath(column, validity_bitmap_unpacked)) {
-    // Return early since the validity bitmap was already filled.
-    return validity_bitmap;
-  }
-
-  std::shared_ptr<ChunkedArray> chunked_array = column->data();
-  const int32_t num_chunks = chunked_array->num_chunks();
-
-  int64_t mx_array_offset = 0;
-  // Iterate over each arrow::Array in the arrow::ChunkedArray.
-  for (int32_t chunk_index = 0; chunk_index < num_chunks; ++chunk_index) {
-    std::shared_ptr<Array> array = chunked_array->chunk(chunk_index);
-    const int64_t array_length = array->length();
-
-    if (!TryBitUnpackFastPath(array, validity_bitmap_unpacked + mx_array_offset)) {
-      // Couldn't fill the full validity bitmap at once. Call an optimized loop-unrolled
-      // implementation instead that goes byte-by-byte and populates the validity bitmap.
-      BitUnpackBuffer(array->null_bitmap(), array_length,
-                      validity_bitmap_unpacked + mx_array_offset);
-    }
-
-    mx_array_offset += array_length;
+  if (!TryBitUnpackFastPath(column, validity_bitmap_unpacked)) {
+    // Couldn't fill the full validity bitmap at once. Call an optimized loop-unrolled
+    // implementation instead that goes byte-by-byte and populates the validity bitmap.
+    BitUnpackBuffer(column->null_bitmap(), column->length(), validity_bitmap_unpacked);
   }
 
   return validity_bitmap;
 }
 
-// Read the type name of an Arrow column as an mxChar array.
-mxArray* ReadVariableType(const std::shared_ptr<Column>& column) {
+// Read the type name of an arrow::Array as an mxChar array.
+mxArray* ReadVariableType(const std::shared_ptr<Array>& column) {
   return util::ConvertUTF8StringToUTF16CharMatrix(column->type()->name());
 }
 
@@ -204,18 +175,18 @@ static constexpr uint64_t MAX_MATLAB_SIZE = static_cast<uint64_t>(0x01) << 48;
 Status FeatherReader::Open(const std::string& filename,
                            std::shared_ptr<FeatherReader>* feather_reader) {
   *feather_reader = std::shared_ptr<FeatherReader>(new FeatherReader());
- 
+
   // Open file with given filename as a ReadableFile.
   std::shared_ptr<io::ReadableFile> readable_file(nullptr);
-  
+
   RETURN_NOT_OK(io::ReadableFile::Open(filename, &readable_file));
-  
+
   // TableReader expects a RandomAccessFile.
   std::shared_ptr<io::RandomAccessFile> random_access_file(readable_file);
 
   // Open the Feather file for reading with a TableReader.
-  RETURN_NOT_OK(ipc::feather::TableReader::Open(
-      random_access_file, &(*feather_reader)->table_reader_));
+  RETURN_NOT_OK(ipc::feather::TableReader::Open(random_access_file,
+                                                &(*feather_reader)->table_reader_));
 
   // Read the table metadata from the Feather file.
   (*feather_reader)->num_rows_ = (*feather_reader)->table_reader_->num_rows();
@@ -273,14 +244,20 @@ mxArray* FeatherReader::ReadVariables() const {
 
   // Read all the table variables in the Feather file into memory.
   for (int64_t i = 0; i < num_variables_; ++i) {
-    std::shared_ptr<Column> column(nullptr);
+    std::shared_ptr<ChunkedArray> column;
     util::HandleStatus(table_reader_->GetColumn(i, &column));
+    if (column->num_chunks() != 1) {
+      mexErrMsgIdAndTxt("MATLAB:arrow:FeatherReader::ReadVariables",
+                        "Chunked columns not yet supported");
+    }
+    std::shared_ptr<Array> chunk = column->chunk(0);
+    const std::string column_name = table_reader_->GetColumnName(i);
 
     // set the struct fields data
-    mxSetField(variables, i, "Name", internal::ReadVariableName(column));
-    mxSetField(variables, i, "Type", internal::ReadVariableType(column));
-    mxSetField(variables, i, "Data", internal::ReadVariableData(column));
-    mxSetField(variables, i, "Valid", internal::ReadVariableValidityBitmap(column));
+    mxSetField(variables, i, "Name", internal::ReadVariableName(column_name));
+    mxSetField(variables, i, "Type", internal::ReadVariableType(chunk));
+    mxSetField(variables, i, "Data", internal::ReadVariableData(chunk, column_name));
+    mxSetField(variables, i, "Valid", internal::ReadVariableValidityBitmap(chunk));
   }
 
   return variables;
diff --git a/matlab/src/util/handle_status.cc b/matlab/src/util/handle_status.cc
index 992f2c31d37..f1c3b7f2598 100644
--- a/matlab/src/util/handle_status.cc
+++ b/matlab/src/util/handle_status.cc
@@ -79,31 +79,6 @@ void HandleStatus(const Status& status) {
                         status.ToString().c_str());
       break;
     }
-    case StatusCode::PythonError: {
-      mexErrMsgIdAndTxt("MATLAB:arrow:status:PythonError", arrow_error_message,
-                        status.ToString().c_str());
-      break;
-    }
-    case StatusCode::PlasmaObjectExists: {
-      mexErrMsgIdAndTxt("MATLAB:arrow:status:PlasmaObjectExists", arrow_error_message,
-                        status.ToString().c_str());
-      break;
-    }
-    case StatusCode::PlasmaObjectNonexistent: {
-      mexErrMsgIdAndTxt("MATLAB:arrow:status:PlasmaObjectNonexistent",
-                        arrow_error_message, status.ToString().c_str());
-      break;
-    }
-    case StatusCode::PlasmaStoreFull: {
-      mexErrMsgIdAndTxt("MATLAB:arrow:status:PlasmaStoreFull", arrow_error_message,
-                        status.ToString().c_str());
-      break;
-    }
-    case StatusCode::PlasmaObjectAlreadySealed: {
-      mexErrMsgIdAndTxt("MATLAB:arrow:status:PlasmaObjectAlreadySealed",
-                        arrow_error_message, status.ToString().c_str());
-      break;
-    }
     default: {
       mexErrMsgIdAndTxt("MATLAB:arrow:status:UnknownStatus", arrow_error_message,
                         "Unknown status");
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 6b9996b4d89..87d26d319dd 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -271,17 +271,12 @@ function(bundle_boost_lib library_path)
   endif()
 endfunction()
 
-# We can assume that manylinux1 and macosx have system zlib.
-# See https://mail.python.org/mm3/archives/list/distutils-sig@python.org/thread/ZZG6GL3XTBLBJXSITYHEXMFKN43EREB7/
-# for manylinux1.
-function(bundle_arrow_dependency library_name)
+function(bundle_arrow_dependency library_name shared_lib_name)
   if(MSVC)
-    set(SHARED_LIB_NAME "${library_name}.dll")
     if(DEFINED ENV{CONDA_PREFIX})
       set(SHARED_LIB_HOME "$ENV{CONDA_PREFIX}\\Library")
     endif()
   else()
-    set(SHARED_LIB_NAME "lib${library_name}.so")
     if(DEFINED ENV{CONDA_PREFIX})
       set(SHARED_LIB_HOME "$ENV{CONDA_PREFIX}")
     endif()
@@ -291,17 +286,22 @@ function(bundle_arrow_dependency library_name)
   endif()
   unset(SHARED_LIB_PATH CACHE)
   if("${SHARED_LIB_HOME}" STREQUAL "")
-    find_library(SHARED_LIB_PATH NAMES ${SHARED_LIB_NAME})
+    find_library(SHARED_LIB_PATH NAMES ${shared_lib_name})
   else()
     find_library(SHARED_LIB_PATH
-                 NAMES ${SHARED_LIB_NAME}
+                 NAMES ${shared_lib_name}
                  PATHS ${SHARED_LIB_HOME}
                  NO_DEFAULT_PATH
                  PATH_SUFFIXES "bin")
   endif()
   if(SHARED_LIB_PATH)
-    message(STATUS "Bundle dependency ${library_name}: ${SHARED_LIB_PATH}")
-    file(COPY ${SHARED_LIB_PATH} DESTINATION ${BUILD_OUTPUT_ROOT_DIRECTORY})
+    get_filename_component(SHARED_LIB_REALPATH ${SHARED_LIB_PATH} REALPATH)
+    get_filename_component(SHARED_LIB_NAME ${SHARED_LIB_PATH} NAME)
+    message(
+      STATUS
+        "Bundle dependency ${library_name}: ${SHARED_LIB_REALPATH} as ${SHARED_LIB_NAME}")
+    configure_file(${SHARED_LIB_REALPATH}
+                   ${BUILD_OUTPUT_ROOT_DIRECTORY}/${SHARED_LIB_NAME} COPYONLY)
   else()
     message(FATAL_ERROR "Unable to bundle dependency: ${library_name}")
   endif()
@@ -360,7 +360,6 @@ if(PYARROW_BUNDLE_ARROW_CPP)
   if(MSVC)
     bundle_arrow_implib(ARROW_SHARED_IMP_LIB)
     bundle_arrow_implib(ARROW_PYTHON_SHARED_IMP_LIB)
-    bundle_arrow_dependency(zlib)
   endif()
 endif()
 
@@ -504,13 +503,13 @@ if(PYARROW_BUILD_FLIGHT)
                        ${ARROW_SO_VERSION})
       if(MSVC)
         bundle_arrow_implib(ARROW_FLIGHT_SHARED_IMP_LIB)
-        bundle_arrow_dependency(cares)
-        bundle_arrow_dependency(libprotobuf)
+        bundle_arrow_dependency(cares cares.dll)
+        bundle_arrow_dependency(libprotobuf libprotobuf.dll)
         # XXX Hardcoded library names because CMake is too stupid to give us
         # the shared library paths.
         # https://gitlab.kitware.com/cmake/cmake/issues/16210
-        bundle_arrow_dependency(libcrypto-1_1-x64)
-        bundle_arrow_dependency(libssl-1_1-x64)
+        bundle_arrow_dependency(libcrypto-1_1-x64 libcrypto-1_1-x64.dll)
+        bundle_arrow_dependency(libssl-1_1-x64 libssl-1_1-x64.dll)
       endif()
     endif()
     if(MSVC)
diff --git a/python/LICENSE.txt b/python/LICENSE.txt
deleted file mode 100644
index 078e144ded1..00000000000
--- a/python/LICENSE.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-## 3rd-party licenses for code that has been adapted for the Arrow Python
-   library
-
--------------------------------------------------------------------------------
-Some code from pandas has been adapted for this codebase. pandas is available
-under the 3-clause BSD license, which follows:
-
-pandas license
-==============
-
-Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
-All rights reserved.
-
-Copyright (c) 2008-2011 AQR Capital Management, LLC
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-       notice, this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above
-       copyright notice, this list of conditions and the following
-       disclaimer in the documentation and/or other materials provided
-       with the distribution.
-
-    * Neither the name of the copyright holder nor the names of any
-       contributors may be used to endorse or promote products derived
-       from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------------------
-
-Some bits from DyND, in particular aspects of the build system, have been
-adapted from libdynd and dynd-python under the terms of the BSD 2-clause
-license
-
-The BSD 2-Clause License
-
-    Copyright (C) 2011-12, Dynamic NDArray Developers
-    All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are
-    met:
-
-        * Redistributions of source code must retain the above copyright
-           notice, this list of conditions and the following disclaimer.
-
-        * Redistributions in binary form must reproduce the above
-           copyright notice, this list of conditions and the following
-           disclaimer in the documentation and/or other materials provided
-           with the distribution.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Dynamic NDArray Developers list:
-
- * Mark Wiebe
- * Continuum Analytics
-
--------------------------------------------------------------------------------
-
-Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted
-for Arrow. Ibis is released under the Apache License, Version 2.0.
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 1ae612fe1ce..ed7012e4b70 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -1,5 +1,6 @@
 include README.md
-include LICENSE.txt
+include ../LICENSE.txt
+include ../NOTICE.txt
 
 global-include CMakeLists.txt
 graft pyarrow
diff --git a/python/examples/flight/server.py b/python/examples/flight/server.py
index 3b699723df4..f8a3563d8c9 100644
--- a/python/examples/flight/server.py
+++ b/python/examples/flight/server.py
@@ -122,8 +122,9 @@ def main():
             kwargs["tls_private_key"] = key_file.read()
 
     location = "{}://0.0.0.0:{}".format(scheme, args.port)
+    server.init(location, **kwargs)
     print("Serving on", location)
-    server.run(location, **kwargs)
+    server.run()
 
 
 if __name__ == '__main__':
diff --git a/python/manylinux1/README.md b/python/manylinux1/README.md
index 948de2082c0..731dbed47dd 100644
--- a/python/manylinux1/README.md
+++ b/python/manylinux1/README.md
@@ -38,7 +38,7 @@ use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`):
 
 ```bash
 # Build the python packages
-docker run --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/ursa-labs/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh
+docker-compose run -e PYTHON_VERSION="2.7" -e UNICODE_WIDTH=16 python-manylinux1
 # Now the new packages are located in the dist/ folder
 ls -l dist/
 ```
@@ -49,7 +49,7 @@ a dependency to a new version, we also need to adjust it. You can rebuild
 this image using
 
 ```bash
-docker build -t arrow_manylinux1_x86_64_base -f Dockerfile-x86_64_base .
+docker-compose build python-manylinux1
 ```
 
 For each dependency, we have a bash script in the directory `scripts/` that
@@ -59,29 +59,46 @@ dependency is persisted in the docker image. When you do local adjustments to
 this image, you need to change the name of the docker image in the `docker run`
 command.
 
+### Publishing a new build image
+
+If you have write access to the Docker Hub Ursa Labs account, you can directly
+publish a build image that you built locally.
+
+```bash
+$ docker-compose push python-manylinux1
+```
+
 ### Using quay.io to trigger and build the docker image
 
-1.  Make the change in the build scripts (eg. to modify the boost build, update `scripts/boost.sh`).
+The used images under the docker-compose setup can be freely changed, currently
+the images are hosted on dockerhub.
+
+1.  Make the change in the build scripts (eg. to modify the boost build, update
+    `scripts/boost.sh`).
 
 2.  Setup an account on quay.io and link to your GitHub account
 
 3.  In quay.io,  Add a new repository using :
 
     1.  Link to GitHub repository push
-    2.  Trigger build on changes to a specific branch (eg. myquay) of the repo (eg. `pravindra/arrow`)
+    2.  Trigger build on changes to a specific branch (eg. myquay) of the repo
+        (eg. `pravindra/arrow`)
     3.  Set Dockerfile location to `/python/manylinux1/Dockerfile-x86_64_base`
     4.  Set Context location to `/python/manylinux1`
 
 4.  Push change (in step 1) to the branch specified in step 3.ii
 
-    *  This should trigger a build in quay.io, the build takes about 2 hrs to finish.
+    *  This should trigger a build in quay.io, the build takes about 2 hrs to
+       finish.
 
-5.  Add a tag `latest` to the build after step 4 finishes, save the build ID (eg. `quay.io/pravindra/arrow_manylinux1_x86_64_base:latest`)
+5.  Add a tag `latest` to the build after step 4 finishes, save the build ID
+    (eg. `quay.io/pravindra/arrow_manylinux1_x86_64_base:latest`)
 
 6.  In your arrow PR,
 
     *  include the change from 1.
-    *  modify `travis_script_manylinux.sh` to switch to the location from step 5 for the docker image.
+    *  modify the docker-compose.yml's python-manylinux1 entryo to switch to
+       the location from step 5 for the docker image.
 
 ## TensorFlow compatible wheels for Arrow
 
diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh
index ca5ab7c643d..9128b73280f 100755
--- a/python/manylinux1/build_arrow.sh
+++ b/python/manylinux1/build_arrow.sh
@@ -79,7 +79,6 @@ mkdir -p "${ARROW_BUILD_DIR}"
 pushd "${ARROW_BUILD_DIR}"
 cmake -DCMAKE_BUILD_TYPE=Release \
     -DARROW_DEPENDENCY_SOURCE="SYSTEM" \
-    -DZLIB_ROOT=/usr/local \
     -DCMAKE_INSTALL_PREFIX=/arrow-dist \
     -DCMAKE_INSTALL_LIBDIR=lib \
     -DARROW_BUILD_TESTS=OFF \
@@ -103,6 +102,7 @@ cmake -DCMAKE_BUILD_TYPE=Release \
     -DOPENSSL_USE_STATIC_LIBS=ON \
     -DORC_SOURCE=BUNDLED \
     -GNinja /arrow/cpp
+ninja
 ninja install
 popd
 
diff --git a/python/manylinux1/scripts/build_zlib.sh b/python/manylinux1/scripts/build_zlib.sh
index 272b6c4b2b0..71968c1a95d 100755
--- a/python/manylinux1/scripts/build_zlib.sh
+++ b/python/manylinux1/scripts/build_zlib.sh
@@ -19,7 +19,7 @@
 curl -sL https://zlib.net/zlib-1.2.11.tar.gz -o /zlib-1.2.11.tar.gz
 tar xf zlib-1.2.11.tar.gz
 pushd zlib-1.2.11
-./configure
+CFLAGS=-fPIC ./configure --static
 make -j8
 make install
 popd
diff --git a/python/manylinux2010/README.md b/python/manylinux2010/README.md
index db0e4147c4d..fe2888e89df 100644
--- a/python/manylinux2010/README.md
+++ b/python/manylinux2010/README.md
@@ -27,7 +27,7 @@ The process is split up in two parts:
 
 1. There are base Docker images that contain the build dependencies for
    Arrow.  Those images do not need to be rebuilt frequently, and are hosted
-   on the public quay.io service.
+   on the public Docker Hub service.
 
 2. Based on on these images, there is a bash script (`build_arrow.sh`) that
    the PyArrow wheels for all supported Python versions, and place them
@@ -42,7 +42,7 @@ use `PYTHON_VERSION="2.7"` with `UNICODE_WIDTH=32`):
 
 ```bash
 # Build the python packages
-docker run --env PYTHON_VERSION="2.7" --env UNICODE_WIDTH=16 --shm-size=2g --rm -t -i -v $PWD:/io -v $PWD/../../:/arrow quay.io/ursa-labs/arrow_manylinux2010_x86_64_base:latest /io/build_arrow.sh
+docker-compose run -e PYTHON_VERSION="2.7" -e UNICODE_WIDTH=16 python-manylinux2010
 # Now the new packages are located in the dist/ folder
 ls -l dist/
 ```
@@ -55,7 +55,7 @@ The Docker configuration is in `Dockerfile-x86_64_base`, and it calls into
 scripts stored under the `scripts` directory.
 
 ```bash
-docker build -t arrow_manylinux2010_x86_64_base -f Dockerfile-x86_64_base .
+docker-compose build python-manylinux2010
 ```
 
 For each dependency, a bash script in the `scripts/` directory downloads the
@@ -65,46 +65,12 @@ installation of a dependency is persisted in the Docker image.
 
 ### Publishing a new build image
 
-If you have write access to the `quay.io` Ursa Labs account, you can directly
+If you have write access to the Docker Hub Ursa Labs account, you can directly
 publish a build image that you built locally.
 
-For that you need to first tag your image for quay.io upload:
 ```bash
-$ docker image tag arrow_manylinux2010_x86_64_base:latest quay.io/ursa-labs/arrow_manylinux2010_x86_64_base
-```
-
-Then you can push it:
-```bash
-$ docker image push quay.io/ursa-labs/arrow_manylinux2010_x86_64_base
-The push refers to repository [quay.io/ursa-labs/arrow_manylinux2010_x86_64_base]
+$ docker push python-manylinux2010
+The push refers to repository [ursalab/arrow_manylinux2010_x86_64_base]
 a1ab88d27acc: Pushing [==============>                                    ]  492.5MB/1.645GB
 [... etc. ...]
 ```
-
-### Using quay.io to trigger and build the docker image
-
-You can also create your own `quay.io` repository and trigger builds there from
-your Github fork of the Arrow repository.
-
-1.  Make the change in the build scripts (eg. to modify the boost build, update `scripts/boost.sh`).
-
-2.  Setup an account on quay.io and link to your GitHub account
-
-3.  In quay.io,  Add a new repository using :
-
-    1.  Link to GitHub repository push
-    2.  Trigger build on changes to a specific branch (eg. myquay) of the repo (eg. `pravindra/arrow`)
-    3.  Set Dockerfile location to `/python/manylinux2010/Dockerfile-x86_64_base`
-    4.  Set Context location to `/python/manylinux2010`
-
-4.  Push change (in step 1) to the branch specified in step 3.ii
-
-    *  This should trigger a build in quay.io, the build takes about 2 hrs to finish.
-
-5.  Add a tag `latest` to the build after step 4 finishes, save the build ID (eg. `quay.io/pravindra/arrow_manylinux2010_x86_64_base:latest`)
-
-6.  In your arrow PR,
-
-    *  include the change from 1.
-    *  modify the `python-manylinux2010` entry in `docker-compose.yml`
-       to switch to the location from step 5 for the docker image.
diff --git a/python/manylinux2010/scripts/build_lz4.sh b/python/manylinux2010/scripts/build_lz4.sh
index 7b3d4416dba..6d5d34c4e74 100755
--- a/python/manylinux2010/scripts/build_lz4.sh
+++ b/python/manylinux2010/scripts/build_lz4.sh
@@ -30,4 +30,4 @@ make install PREFIX=${PREFIX}
 popd
 rm -rf lz4-${LZ4_VERSION}.tar.gz lz4-${LZ4_VERSION}
 # We don't want to link against shared libs
-rm -rf /usr/lib/liblz4.so*
+rm -rf ${PREFIX}/lib/liblz4.so*
diff --git a/python/manylinux2010/scripts/build_zlib.sh b/python/manylinux2010/scripts/build_zlib.sh
index 272b6c4b2b0..71968c1a95d 100755
--- a/python/manylinux2010/scripts/build_zlib.sh
+++ b/python/manylinux2010/scripts/build_zlib.sh
@@ -19,7 +19,7 @@
 curl -sL https://zlib.net/zlib-1.2.11.tar.gz -o /zlib-1.2.11.tar.gz
 tar xf zlib-1.2.11.tar.gz
 pushd zlib-1.2.11
-./configure
+CFLAGS=-fPIC ./configure --static
 make -j8
 make install
 popd
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
index 95cea5ca4fc..432880556cc 100644
--- a/python/pyarrow/__init__.pxd
+++ b/python/pyarrow/__init__.pxd
@@ -20,8 +20,9 @@ from __future__ import absolute_import
 from libcpp.memory cimport shared_ptr
 from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
                                         CField, CRecordBatch, CSchema,
-                                        CTable, CTensor)
-
+                                        CTable, CTensor,
+                                        CSparseTensorCSR, CSparseTensorCOO)
+from pyarrow.compat import frombytes
 
 cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
     cdef int import_pyarrow() except -1
@@ -31,6 +32,10 @@ cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
     cdef object wrap_schema(const shared_ptr[CSchema]& schema)
     cdef object wrap_array(const shared_ptr[CArray]& sp_array)
     cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+    cdef object wrap_sparse_tensor_coo(
+        const shared_ptr[CSparseTensorCOO]& sp_sparse_tensor)
+    cdef object wrap_sparse_tensor_csr(
+        const shared_ptr[CSparseTensorCSR]& sp_sparse_tensor)
     cdef object wrap_column(const shared_ptr[CColumn]& ccolumn)
     cdef object wrap_table(const shared_ptr[CTable]& ctable)
     cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 487065c2892..bc49e1733ca 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -65,7 +65,8 @@ def parse_git(root, **kwargs):
                          Schema,
                          schema,
                          Array, Tensor,
-                         array, chunked_array, column, table,
+                         array, chunked_array, table,
+                         SparseTensorCSR, SparseTensorCOO,
                          infer_type, from_numpy_dtype,
                          NullArray,
                          NumericArray, IntegerArray, FloatingPointArray,
@@ -110,7 +111,7 @@ def parse_git(root, **kwargs):
                          create_memory_map, have_libhdfs, have_libhdfs3,
                          MockOutputStream, input_stream, output_stream)
 
-from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
+from pyarrow.lib import (ChunkedArray, RecordBatch, Table,
                          concat_arrays, concat_tables)
 
 # Exceptions
@@ -121,8 +122,7 @@ def parse_git(root, **kwargs):
                          ArrowMemoryError,
                          ArrowNotImplementedError,
                          ArrowTypeError,
-                         ArrowSerializationError,
-                         PlasmaObjectExists)
+                         ArrowSerializationError)
 
 # Serialization
 from pyarrow.lib import (deserialize_from, deserialize,
@@ -209,6 +209,31 @@ def get_include():
     return _os.path.join(_os.path.dirname(__file__), 'include')
 
 
+def _get_pkg_config_executable():
+    return _os.environ.get('PKG_CONFIG', 'pkg-config')
+
+
+def _has_pkg_config(pkgname):
+    import subprocess
+    try:
+        return subprocess.call([_get_pkg_config_executable(),
+                                '--exists', pkgname]) == 0
+    except OSError:
+        # TODO: replace with FileNotFoundError once we ditch 2.7
+        return False
+
+
+def _read_pkg_config_variable(pkgname, cli_args):
+    import subprocess
+    cmd = [_get_pkg_config_executable(), pkgname] + cli_args
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    out, err = proc.communicate()
+    if proc.returncode != 0:
+        raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
+    return out.rstrip().decode('utf8')
+
+
 def get_libraries():
     """
     Return list of library names to include in the `libraries` argument for C
@@ -223,38 +248,37 @@ def get_library_dirs():
     linking C or Cython extensions using pyarrow
     """
     package_cwd = _os.path.dirname(__file__)
-
     library_dirs = [package_cwd]
 
+    def append_library_dir(library_dir):
+        if library_dir not in library_dirs:
+            library_dirs.append(library_dir)
+
     # Search library paths via pkg-config. This is necessary if the user
     # installed libarrow and the other shared libraries manually and they
     # are not shipped inside the pyarrow package (see also ARROW-2976).
-    from subprocess import call, PIPE, Popen
-    pkg_config_executable = _os.environ.get('PKG_CONFIG', None) or 'pkg-config'
-    for package in ["arrow", "plasma", "arrow_python"]:
-        cmd = '{0} --exists {1}'.format(pkg_config_executable, package).split()
-        try:
-            if call(cmd) == 0:
-                cmd = [pkg_config_executable, "--libs-only-L", package]
-                proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
-                out, err = proc.communicate()
-                library_dir = out.rstrip().decode('utf-8')[2:] # strip "-L"
-                if library_dir not in library_dirs:
-                    library_dirs.append(library_dir)
-        except FileNotFoundError:
-            pass
+    pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
+    for pkgname in ["arrow", "arrow_python"]:
+        if _has_pkg_config(pkgname):
+            library_dir = _read_pkg_config_variable(pkgname,
+                                                    ["--libs-only-L"])
+            assert library_dir.startswith("-L")
+            append_library_dir(library_dir[2:])
 
     if _sys.platform == 'win32':
         # TODO(wesm): Is this necessary, or does setuptools within a conda
         # installation add Library\lib to the linker path for MSVC?
         python_base_install = _os.path.dirname(_sys.executable)
-        library_lib = _os.path.join(python_base_install, 'Library', 'lib')
+        library_dir = _os.path.join(python_base_install, 'Library', 'lib')
 
-        if _os.path.exists(_os.path.join(library_lib, 'arrow.lib')):
-            library_dirs.append(library_lib)
+        if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
+            append_library_dir(library_dir)
 
     # ARROW-4074: Allow for ARROW_HOME to be set to some other directory
-    if 'ARROW_HOME' in _os.environ:
-        library_dirs.append(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
+    if _os.environ.get('ARROW_HOME'):
+        append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
+    else:
+        # Python wheels bundle the Arrow libraries in the pyarrow directory.
+        append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))
 
     return library_dirs
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index a0d08f83f59..5bfc88a4258 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -1305,8 +1305,8 @@ cdef class FlightServerBase:
     cdef:
         unique_ptr[PyFlightServer] server
 
-    def run(self, location, auth_handler=None, tls_certificates=None):
-        """Start this server.
+    def init(self, location, auth_handler=None, tls_certificates=None):
+        """Initialize this server.
 
         Parameters
         ----------
@@ -1348,7 +1348,18 @@ cdef class FlightServerBase:
         self.server.reset(c_server)
         with nogil:
             check_status(c_server.Init(deref(c_options)))
-            check_status(c_server.ServeWithSignals())
+
+    def run(self):
+        """
+        Start serving.  This method only returns if shutdown() is called
+        or a signal a received.
+
+        You must have called init() first.
+        """
+        if self.server.get() == nullptr:
+            raise ValueError("run() on uninitialized FlightServerBase")
+        with nogil:
+            check_status(self.server.get().ServeWithSignals())
 
     def list_flights(self, context, criteria):
         raise NotImplementedError
@@ -1381,6 +1392,7 @@ cdef class FlightServerBase:
         # complete. Holding the GIL means Python-implemented Flight
         # methods will never get to run, so this will hang
         # indefinitely.
+        if self.server.get() == nullptr:
+            raise ValueError("shutdown() on uninitialized FlightServerBase")
         with nogil:
-            if self.server.get() != NULL:
-                self.server.get().Shutdown()
+            check_status(self.server.get().Shutdown())
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index d9861f289b1..eb74dea852b 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -484,6 +484,8 @@ cdef class RowGroupMetaData:
         return True
 
     def column(self, int i):
+        if i < 0 or i >= self.num_columns:
+            raise IndexError('{0} out of bounds'.format(i))
         chunk = ColumnChunkMetaData()
         chunk.init(deref(self.metadata), i)
         return chunk
diff --git a/python/pyarrow/_plasma.pyx b/python/pyarrow/_plasma.pyx
index e352377f14e..7e994c3ee07 100644
--- a/python/pyarrow/_plasma.pyx
+++ b/python/pyarrow/_plasma.pyx
@@ -37,8 +37,10 @@ import warnings
 
 import pyarrow
 from pyarrow.lib cimport Buffer, NativeFile, check_status, pyarrow_wrap_buffer
+from pyarrow.lib import ArrowException
 from pyarrow.includes.libarrow cimport (CBuffer, CMutableBuffer,
                                         CFixedSizeBufferWriter, CStatus)
+from pyarrow.includes.libplasma cimport *
 
 from pyarrow import compat
 
@@ -255,6 +257,34 @@ cdef class PlasmaBuffer(Buffer):
         self.client._release(self.object_id)
 
 
+class PlasmaObjectNonexistent(ArrowException):
+    pass
+
+
+class PlasmaStoreFull(ArrowException):
+    pass
+
+
+class PlasmaObjectExists(ArrowException):
+    pass
+
+
+cdef int plasma_check_status(const CStatus& status) nogil except -1:
+    if status.ok():
+        return 0
+
+    with gil:
+        message = compat.frombytes(status.message())
+        if IsPlasmaObjectExists(status):
+            raise PlasmaObjectExists(message)
+        elif IsPlasmaObjectNonexistent(status):
+            raise PlasmaObjectNonexistent(message)
+        elif IsPlasmaStoreFull(status):
+            raise PlasmaStoreFull(message)
+
+    return check_status(status)
+
+
 cdef class PlasmaClient:
     """
     The PlasmaClient is used to interface with a plasma store and manager.
@@ -283,7 +313,7 @@ cdef class PlasmaClient:
         for object_id in object_ids:
             ids.push_back(object_id.data)
         with nogil:
-            check_status(self.client.get().Get(ids, timeout_ms, result))
+            plasma_check_status(self.client.get().Get(ids, timeout_ms, result))
 
     # XXX C++ API should instead expose some kind of CreateAuto()
     cdef _make_mutable_plasma_buffer(self, ObjectID object_id, uint8_t* data,
@@ -325,9 +355,10 @@ cdef class PlasmaClient:
         """
         cdef shared_ptr[CBuffer] data
         with nogil:
-            check_status(self.client.get().Create(object_id.data, data_size,
-                                                  <uint8_t*>(metadata.data()),
-                                                  metadata.size(), &data))
+            plasma_check_status(
+                self.client.get().Create(object_id.data, data_size,
+                                         <uint8_t*>(metadata.data()),
+                                         metadata.size(), &data))
         return self._make_mutable_plasma_buffer(object_id,
                                                 data.get().mutable_data(),
                                                 data_size)
@@ -358,8 +389,9 @@ cdef class PlasmaClient:
                 enough objects to create room for it.
         """
         with nogil:
-            check_status(self.client.get().CreateAndSeal(object_id.data, data,
-                                                         metadata))
+            plasma_check_status(
+                self.client.get().CreateAndSeal(object_id.data, data,
+                                                metadata))
 
     def get_buffers(self, object_ids, timeout_ms=-1, with_meta=False):
         """
@@ -554,7 +586,7 @@ cdef class PlasmaClient:
             A string used to identify an object.
         """
         with nogil:
-            check_status(self.client.get().Seal(object_id.data))
+            plasma_check_status(self.client.get().Seal(object_id.data))
 
     def _release(self, ObjectID object_id):
         """
@@ -566,7 +598,7 @@ cdef class PlasmaClient:
             A string used to identify an object.
         """
         with nogil:
-            check_status(self.client.get().Release(object_id.data))
+            plasma_check_status(self.client.get().Release(object_id.data))
 
     def contains(self, ObjectID object_id):
         """
@@ -579,8 +611,8 @@ cdef class PlasmaClient:
         """
         cdef c_bool is_contained
         with nogil:
-            check_status(self.client.get().Contains(object_id.data,
-                                                    &is_contained))
+            plasma_check_status(self.client.get().Contains(object_id.data,
+                                                           &is_contained))
         return is_contained
 
     def hash(self, ObjectID object_id):
@@ -600,8 +632,8 @@ cdef class PlasmaClient:
         """
         cdef c_vector[uint8_t] digest = c_vector[uint8_t](kDigestSize)
         with nogil:
-            check_status(self.client.get().Hash(object_id.data,
-                                                digest.data()))
+            plasma_check_status(self.client.get().Hash(object_id.data,
+                                                       digest.data()))
         return bytes(digest[:])
 
     def evict(self, int64_t num_bytes):
@@ -617,13 +649,15 @@ cdef class PlasmaClient:
         """
         cdef int64_t num_bytes_evicted = -1
         with nogil:
-            check_status(self.client.get().Evict(num_bytes, num_bytes_evicted))
+            plasma_check_status(
+                self.client.get().Evict(num_bytes, num_bytes_evicted))
         return num_bytes_evicted
 
     def subscribe(self):
         """Subscribe to notifications about sealed objects."""
         with nogil:
-            check_status(self.client.get().Subscribe(&self.notification_fd))
+            plasma_check_status(
+                self.client.get().Subscribe(&self.notification_fd))
 
     def get_notification_socket(self):
         """
@@ -650,11 +684,11 @@ cdef class PlasmaClient:
         cdef int64_t data_size
         cdef int64_t metadata_size
         with nogil:
-            check_status(self.client.get()
-                         .DecodeNotification(buf,
-                                             &object_id,
-                                             &data_size,
-                                             &metadata_size))
+            status = self.client.get().DecodeNotification(buf,
+                                                          &object_id,
+                                                          &data_size,
+                                                          &metadata_size)
+            plasma_check_status(status)
         return ObjectID(object_id.binary()), data_size, metadata_size
 
     def get_next_notification(self):
@@ -674,11 +708,11 @@ cdef class PlasmaClient:
         cdef int64_t data_size
         cdef int64_t metadata_size
         with nogil:
-            check_status(self.client.get()
-                         .GetNotification(self.notification_fd,
-                                          &object_id.data,
-                                          &data_size,
-                                          &metadata_size))
+            status = self.client.get().GetNotification(self.notification_fd,
+                                                       &object_id.data,
+                                                       &data_size,
+                                                       &metadata_size)
+            plasma_check_status(status)
         return object_id, data_size, metadata_size
 
     def to_capsule(self):
@@ -689,7 +723,7 @@ cdef class PlasmaClient:
         Disconnect this client from the Plasma store.
         """
         with nogil:
-            check_status(self.client.get().Disconnect())
+            plasma_check_status(self.client.get().Disconnect())
 
     def delete(self, object_ids):
         """
@@ -705,7 +739,7 @@ cdef class PlasmaClient:
         for object_id in object_ids:
             ids.push_back(object_id.data)
         with nogil:
-            check_status(self.client.get().Delete(ids))
+            plasma_check_status(self.client.get().Delete(ids))
 
     def list(self):
         """
@@ -738,7 +772,7 @@ cdef class PlasmaClient:
         """
         cdef CObjectTable objects
         with nogil:
-            check_status(self.client.get().List(&objects))
+            plasma_check_status(self.client.get().List(&objects))
         result = dict()
         cdef ObjectID object_id
         cdef CObjectTableEntry entry
@@ -802,7 +836,7 @@ def connect(store_socket_name, manager_socket_name=None, int release_delay=0,
         warnings.warn("release_delay in PlasmaClient.connect is deprecated",
                       FutureWarning)
     with nogil:
-        check_status(result.client.get()
-                     .Connect(result.store_socket_name, b"",
-                              release_delay, num_retries))
+        plasma_check_status(
+            result.client.get().Connect(result.store_socket_name, b"",
+                                        release_delay, num_retries))
     return result
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5ae178d8953..15905a18507 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -870,104 +870,6 @@ cdef class Array(_PandasConvertible):
         return res
 
 
-cdef class Tensor:
-    """
-    A n-dimensional array a.k.a Tensor.
-    """
-
-    def __init__(self):
-        raise TypeError("Do not call Tensor's constructor directly, use one "
-                        "of the `pyarrow.Tensor.from_*` functions instead.")
-
-    cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
-        self.sp_tensor = sp_tensor
-        self.tp = sp_tensor.get()
-        self.type = pyarrow_wrap_data_type(self.tp.type())
-
-    def __repr__(self):
-        return """<pyarrow.Tensor>
-type: {0.type}
-shape: {0.shape}
-strides: {0.strides}""".format(self)
-
-    @staticmethod
-    def from_numpy(obj):
-        cdef shared_ptr[CTensor] ctensor
-        with nogil:
-            check_status(NdarrayToTensor(c_default_memory_pool(), obj,
-                                         &ctensor))
-        return pyarrow_wrap_tensor(ctensor)
-
-    def to_numpy(self):
-        """
-        Convert arrow::Tensor to numpy.ndarray with zero copy
-        """
-        cdef PyObject* out
-
-        with nogil:
-            check_status(TensorToNdarray(self.sp_tensor, self, &out))
-        return PyObject_to_object(out)
-
-    def equals(self, Tensor other):
-        """
-        Return true if the tensors contains exactly equal data
-        """
-        return self.tp.Equals(deref(other.tp))
-
-    def __eq__(self, other):
-        if isinstance(other, Tensor):
-            return self.equals(other)
-        else:
-            return NotImplemented
-
-    @property
-    def is_mutable(self):
-        return self.tp.is_mutable()
-
-    @property
-    def is_contiguous(self):
-        return self.tp.is_contiguous()
-
-    @property
-    def ndim(self):
-        return self.tp.ndim()
-
-    @property
-    def size(self):
-        return self.tp.size()
-
-    @property
-    def shape(self):
-        # Cython knows how to convert a vector[T] to a Python list
-        return tuple(self.tp.shape())
-
-    @property
-    def strides(self):
-        return tuple(self.tp.strides())
-
-    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
-        buffer.buf = <char *> self.tp.data().get().data()
-        pep3118_format = self.type.pep3118_format
-        if pep3118_format is None:
-            raise NotImplementedError("type %s not supported for buffer "
-                                      "protocol" % (self.type,))
-        buffer.format = pep3118_format
-        buffer.itemsize = self.type.bit_width // 8
-        buffer.internal = NULL
-        buffer.len = self.tp.size() * buffer.itemsize
-        buffer.ndim = self.tp.ndim()
-        buffer.obj = self
-        if self.tp.is_mutable():
-            buffer.readonly = 0
-        else:
-            buffer.readonly = 1
-        # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
-        # and strides arrays lifetime is tied to the tensor's
-        buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
-        buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
-        buffer.suboffsets = NULL
-
-
 cdef wrap_array_output(PyObject* output):
     cdef object obj = PyObject_to_object(output)
 
diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index e37307c744c..e9572d83080 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -74,6 +74,9 @@ def tobytes(o):
         else:
             return o
 
+    def u_utf8(s):
+        return s.decode('utf-8')
+
     def frombytes(o):
         return o
 
@@ -112,6 +115,11 @@ def tobytes(o):
         else:
             return o
 
+    def u_utf8(s):
+        if isinstance(s, bytes):
+            return frombytes(s)
+        return s
+
     def frombytes(o):
         return o.decode('utf8')
 
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index 7b5e8d43371..3cb9142d479 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from pyarrow.includes.libarrow cimport CStatus
+from pyarrow.includes.libarrow cimport CStatus, IsPyError, RestorePyError
 from pyarrow.includes.common cimport c_string
 from pyarrow.compat import frombytes
 
@@ -56,30 +56,21 @@ class ArrowIndexError(IndexError, ArrowException):
     pass
 
 
-class PlasmaObjectExists(ArrowException):
-    pass
-
-
-class PlasmaObjectNonexistent(ArrowException):
-    pass
-
-
-class PlasmaStoreFull(ArrowException):
-    pass
-
-
 class ArrowSerializationError(ArrowException):
     pass
 
 
+# This function could be written directly in C++ if we didn't
+# define Arrow-specific subclasses (ArrowInvalid etc.)
 cdef int check_status(const CStatus& status) nogil except -1:
     if status.ok():
         return 0
 
-    if status.IsPythonError():
-        return -1
-
     with gil:
+        if IsPyError(status):
+            RestorePyError(status)
+            return -1
+
         message = frombytes(status.message())
         if status.IsInvalid():
             raise ArrowInvalid(message)
@@ -97,12 +88,6 @@ cdef int check_status(const CStatus& status) nogil except -1:
             raise ArrowCapacityError(message)
         elif status.IsIndexError():
             raise ArrowIndexError(message)
-        elif status.IsPlasmaObjectExists():
-            raise PlasmaObjectExists(message)
-        elif status.IsPlasmaObjectNonexistent():
-            raise PlasmaObjectNonexistent(message)
-        elif status.IsPlasmaStoreFull():
-            raise PlasmaStoreFull(message)
         elif status.IsSerializationError():
             raise ArrowSerializationError(message)
         else:
diff --git a/python/pyarrow/feather.pxi b/python/pyarrow/feather.pxi
index 20b12c1e35d..6fd13bc04b4 100644
--- a/python/pyarrow/feather.pxi
+++ b/python/pyarrow/feather.pxi
@@ -98,12 +98,12 @@ cdef class FeatherReader:
         if i < 0 or i >= self.num_columns:
             raise IndexError(i)
 
-        cdef shared_ptr[CColumn] sp_column
+        cdef shared_ptr[CChunkedArray] sp_chunked_array
         with nogil:
             check_status(self.reader.get()
-                         .GetColumn(i, &sp_column))
+                         .GetColumn(i, &sp_chunked_array))
 
-        return pyarrow_wrap_column(sp_column)
+        return pyarrow_wrap_chunked_array(sp_chunked_array)
 
     def _read(self):
         cdef shared_ptr[CTable] sp_table
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 4a06fc82065..12588fe0c50 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -42,6 +42,7 @@ cdef extern from "numpy/halffloat.h":
 cdef extern from "arrow/api.h" namespace "arrow" nogil:
     # We can later add more of the common status factory methods as needed
     cdef CStatus CStatus_OK "arrow::Status::OK"()
+
     cdef CStatus CStatus_Invalid "arrow::Status::Invalid"()
     cdef CStatus CStatus_NotImplemented \
         "arrow::Status::NotImplemented"(const c_string& msg)
@@ -64,13 +65,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool IsCapacityError()
         c_bool IsIndexError()
         c_bool IsSerializationError()
-        c_bool IsPythonError()
-        c_bool IsPlasmaObjectExists()
-        c_bool IsPlasmaObjectNonexistent()
-        c_bool IsPlasmaStoreFull()
 
-cdef extern from "arrow/result.h" namespace "arrow::internal" nogil:
-    cdef cppclass CResult[T]:
+cdef extern from "arrow/result.h" namespace "arrow" nogil:
+    cdef cppclass CResult "arrow::Result"[T]:
         c_bool ok()
         CStatus status()
         T operator*()
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8798834b5fd..282572e6964 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -155,6 +155,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
         int bit_width()
 
+    cdef cppclass CNullArray" arrow::NullArray"(CArray):
+        CNullArray(int64_t length)
+
     cdef cppclass CDictionaryArray" arrow::DictionaryArray"(CArray):
         CDictionaryArray(const shared_ptr[CDataType]& type,
                          const shared_ptr[CArray]& indices,
@@ -487,29 +490,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CChunkedArray] Slice(int64_t offset, int64_t length) const
         shared_ptr[CChunkedArray] Slice(int64_t offset) const
 
-        CStatus Validate() const
-
-    cdef cppclass CColumn" arrow::Column":
-        CColumn(const shared_ptr[CField]& field,
-                const shared_ptr[CArray]& data)
-
-        CColumn(const shared_ptr[CField]& field,
-                const vector[shared_ptr[CArray]]& chunks)
-
-        CColumn(const shared_ptr[CField]& field,
-                const shared_ptr[CChunkedArray]& data)
-
-        c_bool Equals(const CColumn& other)
+        CStatus Flatten(CMemoryPool* pool,
+                        vector[shared_ptr[CChunkedArray]]* out)
 
-        CStatus Flatten(CMemoryPool* pool, vector[shared_ptr[CColumn]]* out)
-
-        shared_ptr[CField] field()
-
-        int64_t length()
-        int64_t null_count()
-        const c_string& name()
-        shared_ptr[CDataType] type()
-        shared_ptr[CChunkedArray] data()
+        CStatus Validate() const
 
     cdef cppclass CRecordBatch" arrow::RecordBatch":
         @staticmethod
@@ -536,12 +520,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CTable" arrow::Table":
         CTable(const shared_ptr[CSchema]& schema,
-               const vector[shared_ptr[CColumn]]& columns)
+               const vector[shared_ptr[CChunkedArray]]& columns)
 
         @staticmethod
         shared_ptr[CTable] Make(
             const shared_ptr[CSchema]& schema,
-            const vector[shared_ptr[CColumn]]& columns)
+            const vector[shared_ptr[CChunkedArray]]& columns)
 
         @staticmethod
         CStatus FromRecordBatches(
@@ -555,12 +539,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool Equals(const CTable& other)
 
         shared_ptr[CSchema] schema()
-        shared_ptr[CColumn] column(int i)
+        shared_ptr[CChunkedArray] column(int i)
+        shared_ptr[CField] field(int i)
 
-        CStatus AddColumn(int i, const shared_ptr[CColumn]& column,
+        CStatus AddColumn(int i, shared_ptr[CField] field,
+                          shared_ptr[CChunkedArray] column,
                           shared_ptr[CTable]* out)
         CStatus RemoveColumn(int i, shared_ptr[CTable]* out)
-        CStatus SetColumn(int i, const shared_ptr[CColumn]& column,
+        CStatus SetColumn(int i, shared_ptr[CField] field,
+                          shared_ptr[CChunkedArray] column,
                           shared_ptr[CTable]* out)
 
         vector[c_string] ColumnNames()
@@ -593,6 +580,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         int64_t size()
 
         int ndim()
+        const vector[c_string]& dim_names()
         const c_string& dim_name(int i)
 
         c_bool is_mutable()
@@ -600,6 +588,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         Type type_id()
         c_bool Equals(const CTensor& other)
 
+    cdef cppclass CSparseTensorCOO" arrow::SparseTensorCOO":
+        shared_ptr[CDataType] type()
+        shared_ptr[CBuffer] data()
+
+        const vector[int64_t]& shape()
+        int64_t size()
+        int64_t non_zero_length()
+
+        int ndim()
+        const vector[c_string]& dim_names()
+        const c_string& dim_name(int i)
+
+        c_bool is_mutable()
+        Type type_id()
+        c_bool Equals(const CSparseTensorCOO& other)
+
+    cdef cppclass CSparseTensorCSR" arrow::SparseTensorCSR":
+        shared_ptr[CDataType] type()
+        shared_ptr[CBuffer] data()
+
+        const vector[int64_t]& shape()
+        int64_t size()
+        int64_t non_zero_length()
+
+        int ndim()
+        const vector[c_string]& dim_names()
+        const c_string& dim_name(int i)
+
+        c_bool is_mutable()
+        Type type_id()
+        c_bool Equals(const CSparseTensorCSR& other)
+
     cdef cppclass CScalar" arrow::Scalar":
         shared_ptr[CDataType] type
 
@@ -1019,7 +1039,7 @@ cdef extern from "arrow/ipc/api.h" namespace "arrow::ipc" nogil:
 
         shared_ptr[CSchema] schema()
 
-        CStatus GetColumn(int i, shared_ptr[CColumn]* out)
+        CStatus GetColumn(int i, shared_ptr[CChunkedArray]* out)
         c_string GetColumnName(int i)
 
         CStatus Read(shared_ptr[CTable]* out)
@@ -1184,6 +1204,8 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         CMemoryPool* pool
         c_bool from_pandas
 
+    # TODO Some functions below are not actually "nogil"
+
     CStatus ConvertPySequence(object obj, object mask,
                               const PyConversionOptions& options,
                               shared_ptr[CChunkedArray]* out)
@@ -1202,11 +1224,38 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
                            shared_ptr[CChunkedArray]* out)
 
     CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
+                            const vector[c_string]& dim_names,
                             shared_ptr[CTensor]* out)
 
     CStatus TensorToNdarray(const shared_ptr[CTensor]& tensor, object base,
                             PyObject** out)
 
+    CStatus SparseTensorCOOToNdarray(
+        const shared_ptr[CSparseTensorCOO]& sparse_tensor, object base,
+        PyObject** out_data, PyObject** out_coords)
+
+    CStatus SparseTensorCSRToNdarray(
+        const shared_ptr[CSparseTensorCSR]& sparse_tensor, object base,
+        PyObject** out_data, PyObject** out_indptr, PyObject** out_indices)
+
+    CStatus NdarraysToSparseTensorCOO(CMemoryPool* pool, object data_ao,
+                                      object coords_ao,
+                                      const vector[int64_t]& shape,
+                                      const vector[c_string]& dim_names,
+                                      shared_ptr[CSparseTensorCOO]* out)
+
+    CStatus NdarraysToSparseTensorCSR(CMemoryPool* pool, object data_ao,
+                                      object indptr_ao, object indices_ao,
+                                      const vector[int64_t]& shape,
+                                      const vector[c_string]& dim_names,
+                                      shared_ptr[CSparseTensorCSR]* out)
+
+    CStatus TensorToSparseTensorCOO(shared_ptr[CTensor],
+                                    shared_ptr[CSparseTensorCOO]* out)
+
+    CStatus TensorToSparseTensorCSR(shared_ptr[CTensor],
+                                    shared_ptr[CSparseTensorCSR]* out)
+
     CStatus ConvertArrayToPandas(const PandasOptions& options,
                                  const shared_ptr[CArray]& arr,
                                  object py_ref, PyObject** out)
@@ -1215,10 +1264,6 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
                                         const shared_ptr[CChunkedArray]& arr,
                                         object py_ref, PyObject** out)
 
-    CStatus ConvertColumnToPandas(const PandasOptions& options,
-                                  const shared_ptr[CColumn]& arr,
-                                  object py_ref, PyObject** out)
-
     CStatus ConvertTableToPandas(
         const PandasOptions& options,
         const unordered_set[c_string]& categorical_columns,
@@ -1282,6 +1327,11 @@ cdef extern from 'arrow/python/init.h':
     int arrow_init_numpy() except -1
 
 
+cdef extern from 'arrow/python/common.h' namespace "arrow::py":
+    c_bool IsPyError(const CStatus& status)
+    void RestorePyError(const CStatus& status)
+
+
 cdef extern from 'arrow/python/pyarrow.h' namespace 'arrow::py':
     int import_pyarrow() except -1
 
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 06395741195..ed0b33e2a77 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -281,7 +281,7 @@ cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil:
 
         CStatus Init(CFlightServerOptions& options)
         CStatus ServeWithSignals() except *
-        void Shutdown()
+        CStatus Shutdown()
 
     cdef cppclass PyServerAuthHandler\
             " arrow::py::flight::PyServerAuthHandler"(CServerAuthHandler):
diff --git a/python/pyarrow/includes/libplasma.pxd b/python/pyarrow/includes/libplasma.pxd
new file mode 100644
index 00000000000..1b84ab4e0a6
--- /dev/null
+++ b/python/pyarrow/includes/libplasma.pxd
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# distutils: language = c++
+
+from pyarrow.includes.common cimport *
+
+cdef extern from "plasma/common.h" namespace "plasma" nogil:
+    cdef c_bool IsPlasmaObjectExists(const CStatus& status)
+    cdef c_bool IsPlasmaObjectNonexistent(const CStatus& status)
+    cdef c_bool IsPlasmaStoreFull(const CStatus& status)
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index cfd1cd71ac2..14a8b5379b8 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -536,7 +536,7 @@ def read_schema(obj, DictionaryMemo dictionary_memo=None):
     get_reader(obj, True, &cpp_file)
 
     if dictionary_memo is not None:
-        arg_dict_memo = &dictionary_memo.memo
+        arg_dict_memo = dictionary_memo.memo
     else:
         arg_dict_memo = &temp_memo
 
@@ -575,7 +575,7 @@ def read_record_batch(obj, Schema schema,
         message = read_message(obj)
 
     if dictionary_memo is not None:
-        arg_dict_memo = &dictionary_memo.memo
+        arg_dict_memo = dictionary_memo.memo
     else:
         arg_dict_memo = &temp_memo
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 79ab9478b16..09314630f2e 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -74,7 +74,10 @@ cdef class StructType(DataType):
 
 cdef class DictionaryMemo:
     cdef:
-        CDictionaryMemo memo
+        # Even though the CDictionaryMemo instance is private, we allocate
+        # it on the heap so as to avoid C++ ABI issues with Python wheels.
+        shared_ptr[CDictionaryMemo] sp_memo
+        CDictionaryMemo* memo
 
 
 cdef class DictionaryType(DataType):
@@ -231,6 +234,28 @@ cdef class Tensor:
     cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
 
 
+cdef class SparseTensorCSR:
+    cdef:
+        shared_ptr[CSparseTensorCSR] sp_sparse_tensor
+        CSparseTensorCSR* stp
+
+    cdef readonly:
+        DataType type
+
+    cdef void init(self, const shared_ptr[CSparseTensorCSR]& sp_sparse_tensor)
+
+
+cdef class SparseTensorCOO:
+    cdef:
+        shared_ptr[CSparseTensorCOO] sp_sparse_tensor
+        CSparseTensorCOO* stp
+
+    cdef readonly:
+        DataType type
+
+    cdef void init(self, const shared_ptr[CSparseTensorCOO]& sp_sparse_tensor)
+
+
 cdef class NullArray(Array):
     pass
 
@@ -347,14 +372,6 @@ cdef class ChunkedArray(_PandasConvertible):
     cdef getitem(self, int64_t i)
 
 
-cdef class Column(_PandasConvertible):
-    cdef:
-        shared_ptr[CColumn] sp_column
-        CColumn* column
-
-    cdef void init(self, const shared_ptr[CColumn]& column)
-
-
 cdef class Table(_PandasConvertible):
     cdef:
         shared_ptr[CTable] sp_table
@@ -444,7 +461,6 @@ cdef public object pyarrow_wrap_chunked_array(
 # XXX pyarrow.h calls it `wrap_record_batch`
 cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
 cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
-cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
 cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
 cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
 cdef public object pyarrow_wrap_resizable_buffer(
@@ -452,13 +468,20 @@ cdef public object pyarrow_wrap_resizable_buffer(
 cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
 cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
 cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+cdef public object pyarrow_wrap_sparse_tensor_coo(
+    const shared_ptr[CSparseTensorCOO]& sp_sparse_tensor)
+cdef public object pyarrow_wrap_sparse_tensor_csr(
+    const shared_ptr[CSparseTensorCSR]& sp_sparse_tensor)
 
 cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
 cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
 cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
-cdef public shared_ptr[CColumn] pyarrow_unwrap_column(object column)
 cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
 cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
 cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
 cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
 cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
+cdef public shared_ptr[CSparseTensorCOO] pyarrow_unwrap_sparse_tensor_coo(
+    object sparse_tensor)
+cdef public shared_ptr[CSparseTensorCSR] pyarrow_unwrap_sparse_tensor_csr(
+    object sparse_tensor)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 783e2b2731a..2da5a8301bc 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -121,6 +121,9 @@ include "builder.pxi"
 # Column, Table, Record Batch
 include "table.pxi"
 
+# Tensors
+include "tensor.pxi"
+
 # File IO
 include "io.pxi"
 include "io-hdfs.pxi"
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 439b6fe16d9..40598b642dc 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -29,7 +29,8 @@
 
 import pyarrow as pa
 from pyarrow.lib import _pandas_api
-from pyarrow.compat import (builtin_pickle, PY2, zip_longest, Sequence)  # noqa
+from pyarrow.compat import (builtin_pickle,  # noqa
+                            PY2, zip_longest, Sequence, u_utf8)
 
 
 _logical_type_map = {}
@@ -668,7 +669,7 @@ def _check_data_column_metadata_consistency(all_columns):
 
 
 def _deserialize_column_index(block_table, all_columns, column_indexes):
-    column_strings = [x.name for x in block_table.itercolumns()]
+    column_strings = [u_utf8(x) for x in block_table.column_names]
     if all_columns:
         columns_name_dict = {
             c.get('field_name', _column_name_to_strings(c['name'])): c['name']
@@ -770,21 +771,21 @@ def _extract_index_level(table, result_table, field_name,
         # The serialized index column was removed by the user
         return table, None, None
 
+    pd = _pandas_api.pd
+
     col = table.column(i)
-    col_pandas = col.to_pandas()
-    values = col_pandas.values
+    values = col.to_pandas()
+
     if hasattr(values, 'flags') and not values.flags.writeable:
         # ARROW-1054: in pandas 0.19.2, factorize will reject
         # non-writeable arrays when calling MultiIndex.from_arrays
         values = values.copy()
 
-    pd = _pandas_api.pd
-
-    if _pandas_api.is_datetimetz(col_pandas.dtype):
+    if isinstance(col.type, pa.lib.TimestampType):
         index_level = (pd.Series(values).dt.tz_localize('utc')
-                       .dt.tz_convert(col_pandas.dtype.tz))
+                       .dt.tz_convert(col.type.tz))
     else:
-        index_level = pd.Series(values, dtype=col_pandas.dtype)
+        index_level = pd.Series(values, dtype=values.dtype)
     result_table = result_table.remove_column(
         result_table.schema.get_field_index(field_name)
     )
@@ -899,6 +900,7 @@ def _reconstruct_columns_from_metadata(columns, column_indexes):
 
     new_levels = []
     encoder = operator.methodcaller('encode', 'UTF-8')
+
     for level, pandas_dtype in levels_dtypes:
         dtype = _pandas_type_to_numpy_type(pandas_dtype)
 
@@ -944,6 +946,7 @@ def _flatten_single_level_multiindex(index):
 
 def _add_any_metadata(table, pandas_metadata):
     modified_columns = {}
+    modified_fields = {}
 
     schema = table.schema
 
@@ -971,20 +974,23 @@ def _add_any_metadata(table, pandas_metadata):
                 converted = col.to_pandas()
                 tz = col_meta['metadata']['timezone']
                 tz_aware_type = pa.timestamp('ns', tz=tz)
-                with_metadata = pa.Array.from_pandas(converted.values,
+                with_metadata = pa.Array.from_pandas(converted,
                                                      type=tz_aware_type)
 
-                field = pa.field(schema[idx].name, tz_aware_type)
-                modified_columns[idx] = pa.Column.from_array(field,
-                                                             with_metadata)
+                modified_fields[idx] = pa.field(schema[idx].name,
+                                                tz_aware_type)
+                modified_columns[idx] = with_metadata
 
     if len(modified_columns) > 0:
         columns = []
+        fields = []
         for i in range(len(table.schema)):
             if i in modified_columns:
                 columns.append(modified_columns[i])
+                fields.append(modified_fields[i])
             else:
                 columns.append(table[i])
-        return pa.Table.from_arrays(columns)
+                fields.append(table.schema[i])
+        return pa.Table.from_arrays(columns, schema=pa.schema(fields))
     else:
         return table
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 2d780afe79b..c3199db95d3 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -605,9 +605,8 @@ def read(self, columns=None, use_threads=True, partitions=None,
                 # manifest, so ['a', 'b', 'c'] as in our example above.
                 dictionary = partitions.levels[i].dictionary
 
-                arr = lib.DictionaryArray.from_arrays(indices, dictionary)
-                col = lib.Column.from_array(name, arr)
-                table = table.append_column(col)
+                arr = pa.DictionaryArray.from_arrays(indices, dictionary)
+                table = table.append_column(name, arr)
 
         return table
 
diff --git a/python/pyarrow/plasma.py b/python/pyarrow/plasma.py
index 748de97c363..43ca471e0b2 100644
--- a/python/pyarrow/plasma.py
+++ b/python/pyarrow/plasma.py
@@ -27,7 +27,9 @@
 import time
 
 from pyarrow._plasma import (ObjectID, ObjectNotAvailable, # noqa
-                             PlasmaBuffer, PlasmaClient, connect)
+                             PlasmaBuffer, PlasmaClient, connect,
+                             PlasmaObjectExists, PlasmaObjectNonexistent,
+                             PlasmaStoreFull)
 
 
 # The Plasma TensorFlow Operator needs to be compiled on the end user's
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 33bc8031804..f6ef2c955cd 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -16,9 +16,10 @@
 # under the License.
 
 from libcpp.memory cimport shared_ptr
-from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
+from pyarrow.includes.libarrow cimport (CArray, CDataType, CField,
                                         CRecordBatch, CSchema,
-                                        CTable, CTensor)
+                                        CTable, CTensor,
+                                        CSparseTensorCSR, CSparseTensorCOO)
 
 # You cannot assign something to a dereferenced pointer in Cython thus these
 # methods don't use Status to indicate a successful operation.
@@ -225,6 +226,7 @@ cdef api object pyarrow_wrap_scalar(const shared_ptr[CScalar]& sp_scalar):
     scalar.init(sp_scalar)
     return scalar
 
+
 cdef api bint pyarrow_is_tensor(object tensor):
     return isinstance(tensor, Tensor)
 
@@ -248,23 +250,50 @@ cdef api object pyarrow_wrap_tensor(
     return tensor
 
 
-cdef api bint pyarrow_is_column(object column):
-    return isinstance(column, Column)
+cdef api bint pyarrow_is_sparse_tensor_coo(object sparse_tensor):
+    return isinstance(sparse_tensor, SparseTensorCOO)
+
+cdef api shared_ptr[CSparseTensorCOO] pyarrow_unwrap_sparse_tensor_coo(
+        object sparse_tensor):
+    cdef SparseTensorCOO sten
+    if pyarrow_is_sparse_tensor_coo(sparse_tensor):
+        sten = <SparseTensorCOO>(sparse_tensor)
+        return sten.sp_sparse_tensor
+
+    return shared_ptr[CSparseTensorCOO]()
+
+cdef api object pyarrow_wrap_sparse_tensor_coo(
+        const shared_ptr[CSparseTensorCOO]& sp_sparse_tensor):
+    if sp_sparse_tensor.get() == NULL:
+        raise ValueError('SparseTensorCOO was NULL')
+
+    cdef SparseTensorCOO sparse_tensor = SparseTensorCOO.__new__(
+        SparseTensorCOO)
+    sparse_tensor.init(sp_sparse_tensor)
+    return sparse_tensor
+
 
+cdef api bint pyarrow_is_sparse_tensor_csr(object sparse_tensor):
+    return isinstance(sparse_tensor, SparseTensorCSR)
 
-cdef api shared_ptr[CColumn] pyarrow_unwrap_column(object column):
-    cdef Column col
-    if pyarrow_is_column(column):
-        col = <Column>(column)
-        return col.sp_column
+cdef api shared_ptr[CSparseTensorCSR] pyarrow_unwrap_sparse_tensor_csr(
+        object sparse_tensor):
+    cdef SparseTensorCSR sten
+    if pyarrow_is_sparse_tensor_csr(sparse_tensor):
+        sten = <SparseTensorCSR>(sparse_tensor)
+        return sten.sp_sparse_tensor
 
-    return shared_ptr[CColumn]()
+    return shared_ptr[CSparseTensorCSR]()
 
+cdef api object pyarrow_wrap_sparse_tensor_csr(
+        const shared_ptr[CSparseTensorCSR]& sp_sparse_tensor):
+    if sp_sparse_tensor.get() == NULL:
+        raise ValueError('SparseTensorCSR was NULL')
 
-cdef api object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn):
-    cdef Column column = Column.__new__(Column)
-    column.init(ccolumn)
-    return column
+    cdef SparseTensorCSR sparse_tensor = SparseTensorCSR.__new__(
+        SparseTensorCSR)
+    sparse_tensor.init(sp_sparse_tensor)
+    return sparse_tensor
 
 
 cdef api bint pyarrow_is_table(object table):
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 0a76ddbc6e5..c0782fe26c2 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -38,6 +38,14 @@ cdef class ChunkedArray(_PandasConvertible):
     def __reduce__(self):
         return chunked_array, (self.chunks, self.type)
 
+    @property
+    def data(self):
+        import warnings
+        warnings.warn("Calling .data on ChunkedArray is provided for "
+                      "compatibility after Column was removed, simply drop "
+                      "this attribute", FutureWarning)
+        return self
+
     @property
     def type(self):
         return pyarrow_wrap_data_type(self.sp_chunked_array.get().type())
@@ -153,6 +161,33 @@ cdef class ChunkedArray(_PandasConvertible):
             return self.to_pandas()
         return self.to_pandas().astype(dtype)
 
+    def cast(self, object target_type, bint safe=True):
+        """
+        Cast values to another data type
+
+        Parameters
+        ----------
+        target_type : DataType
+            Type to cast to
+        safe : boolean, default True
+            Check for overflows or other unsafe conversions
+
+        Returns
+        -------
+        casted : ChunkedArray
+        """
+        cdef:
+            CCastOptions options = CCastOptions(safe)
+            DataType type = ensure_type(target_type)
+            shared_ptr[CArray] result
+            CDatum out
+
+        with nogil:
+            check_status(Cast(_context(), CDatum(self.sp_chunked_array),
+                              type.sp_type, options, &out))
+
+        return pyarrow_wrap_chunked_array(out.chunked_array())
+
     def dictionary_encode(self):
         """
         Compute dictionary-encoded representation of array
@@ -171,6 +206,29 @@ cdef class ChunkedArray(_PandasConvertible):
 
         return wrap_datum(out)
 
+    def flatten(self, MemoryPool memory_pool=None):
+        """
+        Flatten this ChunkedArray.  If it has a struct type, the column is
+        flattened into one array per struct field.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : List[ChunkedArray]
+        """
+        cdef:
+            vector[shared_ptr[CChunkedArray]] flattened
+            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+        with nogil:
+            check_status(self.chunked_array.Flatten(pool, &flattened))
+
+        return [pyarrow_wrap_chunked_array(col) for col in flattened]
+
     def unique(self):
         """
         Compute distinct elements in array
@@ -267,7 +325,7 @@ def chunked_array(arrays, type=None):
 
     Parameters
     ----------
-    arrays : list of Array or values coercible to arrays
+    arrays : Array, list of Array, or values coercible to arrays
         Must all be the same data type. Can be empty only if type also
         passed
     type : DataType or string coercible to DataType
@@ -282,6 +340,9 @@ def chunked_array(arrays, type=None):
         shared_ptr[CChunkedArray] sp_chunked_array
         shared_ptr[CDataType] sp_data_type
 
+    if isinstance(arrays, Array):
+        arrays = [arrays]
+
     for x in arrays:
         if isinstance(x, Array):
             arr = x
@@ -307,287 +368,10 @@ def chunked_array(arrays, type=None):
     return pyarrow_wrap_chunked_array(sp_chunked_array)
 
 
-def column(object field_or_name, arr):
-    """
-    Create Column object from field/string and array-like data
-
-    Parameters
-    ----------
-    field_or_name : string or Field
-    arr : Array, list of Arrays, or ChunkedArray
-
-    Returns
-    -------
-    column : Column
-    """
-    cdef:
-        Field boxed_field
-        Array _arr
-        ChunkedArray _carr
-        shared_ptr[CColumn] sp_column
-
-    if isinstance(arr, list):
-        arr = chunked_array(arr)
-    elif not isinstance(arr, (Array, ChunkedArray)):
-        arr = array(arr)
-
-    if isinstance(field_or_name, Field):
-        boxed_field = field_or_name
-        if arr.type != boxed_field.type:
-            raise ValueError('Passed field type does not match array')
-    else:
-        boxed_field = field(field_or_name, arr.type)
-
-    if isinstance(arr, Array):
-        _arr = arr
-        sp_column.reset(new CColumn(boxed_field.sp_field, _arr.sp_array))
-    elif isinstance(arr, ChunkedArray):
-        _carr = arr
-        sp_column.reset(new CColumn(boxed_field.sp_field,
-                                    _carr.sp_chunked_array))
-    else:
-        raise ValueError("Unsupported type for column(...): {}"
-                         .format(type(arr)))
-
-    return pyarrow_wrap_column(sp_column)
-
-
-cdef class Column(_PandasConvertible):
-    """
-    Named vector of elements of equal type.
-
-    Warning
-    -------
-    Do not call this class's constructor directly.
-    """
-
-    def __cinit__(self):
-        self.column = NULL
-
-    def __init__(self):
-        raise TypeError("Do not call Column's constructor directly, use one "
-                        "of the `Column.from_*` functions instead.")
-
-    cdef void init(self, const shared_ptr[CColumn]& column):
-        self.sp_column = column
-        self.column = column.get()
-
-    def __reduce__(self):
-        return column, (self.field, self.data)
-
-    def __repr__(self):
-        from pyarrow.compat import StringIO
-        result = StringIO()
-        result.write('<Column name={0!r} type={1!r}>'
-                     .format(self.name, self.type))
-        result.write('\n{}'.format(str(self.data)))
-
-        return result.getvalue()
-
-    def __getitem__(self, key):
-        return self.data[key]
-
-    @staticmethod
-    def from_array(*args):
-        return column(*args)
-
-    def cast(self, object target_type, bint safe=True):
-        """
-        Cast column values to another data type
-
-        Parameters
-        ----------
-        target_type : DataType
-            Type to cast to
-        safe : boolean, default True
-            Check for overflows or other unsafe conversions
-
-        Returns
-        -------
-        casted : Column
-        """
-        cdef:
-            CCastOptions options = CCastOptions(safe)
-            DataType type = ensure_type(target_type)
-            shared_ptr[CArray] result
-            CDatum out
-
-        with nogil:
-            check_status(Cast(_context(), CDatum(self.column.data()),
-                              type.sp_type, options, &out))
-
-        casted_data = pyarrow_wrap_chunked_array(out.chunked_array())
-        return column(self.name, casted_data)
-
-    def dictionary_encode(self):
-        """
-        Compute dictionary-encoded representation of array
-
-        Returns
-        -------
-        pyarrow.Column
-            Same chunking as the input, all chunks share a common dictionary.
-        """
-        ca = self.data.dictionary_encode()
-        return column(self.name, ca)
-
-    def unique(self):
-        """
-        Compute distinct elements in array
-
-        Returns
-        -------
-        pyarrow.Array
-        """
-        return self.data.unique()
-
-    def flatten(self, MemoryPool memory_pool=None):
-        """
-        Flatten this Column.  If it has a struct type, the column is
-        flattened into one column per struct field.
-
-        Parameters
-        ----------
-        memory_pool : MemoryPool, default None
-            For memory allocations, if required, otherwise use default pool
-
-        Returns
-        -------
-        result : List[Column]
-        """
-        cdef:
-            vector[shared_ptr[CColumn]] flattened
-            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
-
-        with nogil:
-            check_status(self.column.Flatten(pool, &flattened))
-
-        return [pyarrow_wrap_column(col) for col in flattened]
-
-    def _to_pandas(self, options, **kwargs):
-        values = self.data._to_pandas(options)
-        result = pandas_api.make_series(values, name=self.name)
-
-        if isinstance(self.type, TimestampType):
-            tz = self.type.tz
-            if tz is not None:
-                tz = string_to_tzinfo(tz)
-                result = (result.dt.tz_localize('utc')
-                          .dt.tz_convert(tz))
-
-        return result
-
-    def __array__(self, dtype=None):
-        return self.data.__array__(dtype=dtype)
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return NotImplemented
-
-    def equals(self, Column other):
-        """
-        Check if contents of two columns are equal
-
-        Parameters
-        ----------
-        other : pyarrow.Column
-
-        Returns
-        -------
-        are_equal : boolean
-        """
-        cdef:
-            CColumn* this_col = self.column
-            CColumn* other_col = other.column
-            c_bool result
-
-        if other is None:
-            return False
-
-        with nogil:
-            result = this_col.Equals(deref(other_col))
-
-        return result
-
-    def to_pylist(self):
-        """
-        Convert to a list of native Python objects.
-        """
-        return self.data.to_pylist()
-
-    def __len__(self):
-        return self.length()
-
-    def length(self):
-        return self.column.length()
-
-    @property
-    def field(self):
-        return pyarrow_wrap_field(self.column.field())
-
-    @property
-    def shape(self):
-        """
-        Dimensions of this columns
-
-        Returns
-        -------
-        (int,)
-        """
-        return (self.length(),)
-
-    @property
-    def null_count(self):
-        """
-        Number of null entires
-
-        Returns
-        -------
-        int
-        """
-        return self.column.null_count()
-
-    @property
-    def name(self):
-        """
-        Label of the column
-
-        Returns
-        -------
-        str
-        """
-        return bytes(self.column.name()).decode('utf8')
-
-    @property
-    def type(self):
-        """
-        Type information for this column
-
-        Returns
-        -------
-        pyarrow.DataType
-        """
-        return pyarrow_wrap_data_type(self.column.type())
-
-    @property
-    def data(self):
-        """
-        The underlying data
-
-        Returns
-        -------
-        pyarrow.ChunkedArray
-        """
-        return pyarrow_wrap_chunked_array(self.column.data())
-
-
 cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema):
     cdef:
         Py_ssize_t K = len(arrays)
         c_string c_name
-        CColumn* c_column
         shared_ptr[CDataType] c_type
         shared_ptr[CKeyValueMetadata] c_meta
         vector[shared_ptr[CField]] c_fields
@@ -603,29 +387,24 @@ cdef _schema_from_arrays(arrays, names, metadata, shared_ptr[CSchema]* schema):
 
     c_fields.resize(K)
 
-    if isinstance(arrays[0], Column):
-        for i in range(K):
-            c_column = (<Column>arrays[i]).column
-            c_fields[i] = c_column.field()
-    else:
-        if names is None:
-            raise ValueError('Must pass names when constructing '
-                             'from Array objects')
-        if len(names) != K:
-            raise ValueError('Length of names ({}) does not match '
-                             'length of arrays ({})'.format(len(names), K))
-        for i in range(K):
-            val = arrays[i]
-            if isinstance(val, (Array, ChunkedArray)):
-                c_type = (<DataType> val.type).sp_type
-            else:
-                raise TypeError(type(val))
+    if names is None:
+        raise ValueError('Must pass names or schema to Table.from_arrays')
 
-            if names[i] is None:
-                c_name = tobytes(u'None')
-            else:
-                c_name = tobytes(names[i])
-            c_fields[i].reset(new CField(c_name, c_type, True))
+    if len(names) != K:
+        raise ValueError('Length of names ({}) does not match '
+                         'length of arrays ({})'.format(len(names), K))
+    for i in range(K):
+        val = arrays[i]
+        if isinstance(val, (Array, ChunkedArray)):
+            c_type = (<DataType> val.type).sp_type
+        else:
+            raise TypeError(type(val))
+
+        if names[i] is None:
+            c_name = tobytes(u'None')
+        else:
+            c_name = tobytes(names[i])
+        c_fields[i].reset(new CField(c_name, c_type, True))
 
     schema.reset(new CSchema(c_fields, c_meta))
 
@@ -732,7 +511,7 @@ cdef class RecordBatch(_PandasConvertible):
 
         Returns
         -------
-        list of pa.Column
+        list of pa.ChunkedArray
         """
         return [self.column(i) for i in range(self.num_columns)]
 
@@ -975,7 +754,7 @@ cdef class Table(_PandasConvertible):
     def __reduce__(self):
         # Reduce the columns as ChunkedArrays to avoid serializing schema
         # data twice
-        columns = [col.data for col in self.columns]
+        columns = [col for col in self.columns]
         return _reconstruct_table, (columns, self.schema)
 
     def replace_schema_metadata(self, metadata=None):
@@ -1101,7 +880,7 @@ cdef class Table(_PandasConvertible):
         casted : Table
         """
         cdef:
-            Column column, casted
+            ChunkedArray column, casted
             Field field
             list newcols = []
 
@@ -1184,17 +963,16 @@ cdef class Table(_PandasConvertible):
     @staticmethod
     def from_arrays(arrays, names=None, schema=None, metadata=None):
         """
-        Construct a Table from Arrow arrays or columns
+        Construct a Table from Arrow arrays
 
         Parameters
         ----------
-        arrays : list of pyarrow.Array or pyarrow.Column
+        arrays : list of pyarrow.Array or pyarrow.ChunkedArray
             Equal-length arrays that should form the table.
         names : list of str, optional
-            Names for the table columns. If Columns passed, will be
-            inferred. If Arrays passed, this argument is required
+            Names for the table columns. If not passed, schema must be passed
         schema : Schema, default None
-            If not passed, will be inferred from the arrays
+            Schema for the created table. If not passed, names must be passed
         metadata : dict or Mapping, default None
             Optional metadata for the schema (if inferred).
 
@@ -1204,7 +982,7 @@ cdef class Table(_PandasConvertible):
 
         """
         cdef:
-            vector[shared_ptr[CColumn]] columns
+            vector[shared_ptr[CChunkedArray]] columns
             Schema cy_schema
             shared_ptr[CSchema] c_schema
             int i, K = <int> len(arrays)
@@ -1228,26 +1006,12 @@ cdef class Table(_PandasConvertible):
         for i in range(K):
             if isinstance(arrays[i], Array):
                 columns.push_back(
-                    make_shared[CColumn](
-                        c_schema.get().field(i),
+                    make_shared[CChunkedArray](
                         (<Array> arrays[i]).sp_array
                     )
                 )
             elif isinstance(arrays[i], ChunkedArray):
-                columns.push_back(
-                    make_shared[CColumn](
-                        c_schema.get().field(i),
-                        (<ChunkedArray> arrays[i]).sp_chunked_array
-                    )
-                )
-            elif isinstance(arrays[i], Column):
-                # Make sure schema field and column are consistent
-                columns.push_back(
-                    make_shared[CColumn](
-                        c_schema.get().field(i),
-                        (<Column> arrays[i]).sp_column.get().data()
-                    )
-                )
+                columns.push_back((<ChunkedArray> arrays[i]).sp_chunked_array)
             else:
                 raise TypeError(type(arrays[i]))
 
@@ -1272,18 +1036,27 @@ cdef class Table(_PandasConvertible):
         pyarrow.Table
 
         """
-        names = []
         arrays = []
-        for k, v in mapping.items():
-            names.append(k)
-            if not isinstance(v, (Array, ChunkedArray)):
-                v = array(v)
-            arrays.append(v)
-        if schema is None:
-            return Table.from_arrays(arrays, names, metadata=metadata)
-        else:
+        if schema is not None:
+            for field in schema:
+                try:
+                    v = mapping[field.name]
+                except KeyError as e:
+                    try:
+                        v = mapping[tobytes(field.name)]
+                    except KeyError as e2:
+                        raise e
+                arrays.append(array(v, type=field.type))
             # Will raise if metadata is not None
             return Table.from_arrays(arrays, schema=schema, metadata=metadata)
+        else:
+            names = []
+            for k, v in mapping.items():
+                names.append(k)
+                if not isinstance(v, (Array, ChunkedArray)):
+                    v = array(v)
+                arrays.append(v)
+            return Table.from_arrays(arrays, names, metadata=metadata)
 
     @staticmethod
     def from_batches(batches, Schema schema=None):
@@ -1381,11 +1154,11 @@ cdef class Table(_PandasConvertible):
             size_t i
             size_t num_columns = self.table.num_columns()
             list entries = []
-            Column column
+            ChunkedArray column
 
         for i in range(num_columns):
             column = self.column(i)
-            entries.append((column.name, column.to_pylist()))
+            entries.append((self.field(i).name, column.to_pylist()))
 
         return ordered_dict(entries)
 
@@ -1400,6 +1173,32 @@ cdef class Table(_PandasConvertible):
         """
         return pyarrow_wrap_schema(self.table.schema())
 
+    def field(self, i):
+        """
+        Select a schema field by its numeric index.
+
+        Parameters
+        ----------
+        i : int or string
+
+        Returns
+        -------
+        pyarrow.Field
+        """
+        cdef:
+            int num_columns = self.num_columns
+            int index
+
+        if not -num_columns <= i < num_columns:
+            raise IndexError(
+                'Table column index {:d} is out of range'.format(i)
+            )
+
+        index = i if i >= 0 else num_columns + i
+        assert index >= 0
+
+        return pyarrow_wrap_field(self.table.field(index))
+
     def column(self, i):
         """
         Select a column by its column name, or numeric index.
@@ -1410,7 +1209,7 @@ cdef class Table(_PandasConvertible):
 
         Returns
         -------
-        pyarrow.Column
+        pyarrow.ChunkedArray
         """
         if isinstance(i, six.string_types):
             field_index = self.schema.get_field_index(i)
@@ -1433,7 +1232,7 @@ cdef class Table(_PandasConvertible):
 
         Returns
         -------
-        pyarrow.Column
+        pyarrow.ChunkedArray
         """
         cdef:
             int num_columns = self.num_columns
@@ -1447,7 +1246,7 @@ cdef class Table(_PandasConvertible):
         index = i if i >= 0 else num_columns + i
         assert index >= 0
 
-        return pyarrow_wrap_column(self.table.column(index))
+        return pyarrow_wrap_chunked_array(self.table.column(index))
 
     def __getitem__(self, key):
         cdef int index = <int> _normalize_index(key, self.num_columns)
@@ -1467,7 +1266,7 @@ cdef class Table(_PandasConvertible):
 
         Returns
         -------
-        list of pa.Column
+        list of pa.ChunkedArray
         """
         return [self._column(i) for i in range(self.num_columns)]
 
@@ -1510,22 +1309,37 @@ cdef class Table(_PandasConvertible):
         """
         return (self.num_rows, self.num_columns)
 
-    def add_column(self, int i, Column column):
+    def add_column(self, int i, field_, column):
         """
         Add column to Table at position. Returns new table
         """
-        cdef shared_ptr[CTable] c_table
+        cdef:
+            shared_ptr[CTable] c_table
+            Field c_field
+            ChunkedArray c_arr
+
+        if isinstance(column, ChunkedArray):
+            c_arr = column
+        else:
+            c_arr = chunked_array(column)
+
+        if isinstance(field_, Field):
+            c_field = field_
+        else:
+            c_field = field(field_, c_arr.type)
 
         with nogil:
-            check_status(self.table.AddColumn(i, column.sp_column, &c_table))
+            check_status(self.table.AddColumn(i, c_field.sp_field,
+                                              c_arr.sp_chunked_array,
+                                              &c_table))
 
         return pyarrow_wrap_table(c_table)
 
-    def append_column(self, Column column):
+    def append_column(self, field_, column):
         """
         Append column at end of columns. Returns new table
         """
-        return self.add_column(self.num_columns, column)
+        return self.add_column(self.num_columns, field_, column)
 
     def remove_column(self, int i):
         """
@@ -1538,14 +1352,29 @@ cdef class Table(_PandasConvertible):
 
         return pyarrow_wrap_table(c_table)
 
-    def set_column(self, int i, Column column):
+    def set_column(self, int i, field_, column):
         """
         Replace column in Table at position. Returns new table
         """
-        cdef shared_ptr[CTable] c_table
+        cdef:
+            shared_ptr[CTable] c_table
+            Field c_field
+            ChunkedArray c_arr
+
+        if isinstance(column, ChunkedArray):
+            c_arr = column
+        else:
+            c_arr = chunked_array(column)
+
+        if isinstance(field_, Field):
+            c_field = field_
+        else:
+            c_field = field(field_, c_arr.type)
 
         with nogil:
-            check_status(self.table.SetColumn(i, column.sp_column, &c_table))
+            check_status(self.table.SetColumn(i, c_field.sp_field,
+                                              c_arr.sp_chunked_array,
+                                              &c_table))
 
         return pyarrow_wrap_table(c_table)
 
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
new file mode 100644
index 00000000000..17554e61740
--- /dev/null
+++ b/python/pyarrow/tensor.pxi
@@ -0,0 +1,367 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+cdef class Tensor:
+    """
+    A n-dimensional array a.k.a Tensor.
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call Tensor's constructor directly, use one "
+                        "of the `pyarrow.Tensor.from_*` functions instead.")
+
+    cdef void init(self, const shared_ptr[CTensor]& sp_tensor):
+        self.sp_tensor = sp_tensor
+        self.tp = sp_tensor.get()
+        self.type = pyarrow_wrap_data_type(self.tp.type())
+
+    def __repr__(self):
+        return """<pyarrow.Tensor>
+type: {0.type}
+shape: {0.shape}
+strides: {0.strides}""".format(self)
+
+    @staticmethod
+    def from_numpy(obj, dim_names=None):
+        cdef:
+            vector[c_string] c_dim_names
+            shared_ptr[CTensor] ctensor
+
+        if dim_names is not None:
+            for x in dim_names:
+                c_dim_names.push_back(tobytes(x))
+
+        check_status(NdarrayToTensor(c_default_memory_pool(), obj,
+                                     c_dim_names, &ctensor))
+        return pyarrow_wrap_tensor(ctensor)
+
+    def to_numpy(self):
+        """
+        Convert arrow::Tensor to numpy.ndarray with zero copy
+        """
+        cdef PyObject* out
+
+        check_status(TensorToNdarray(self.sp_tensor, self, &out))
+        return PyObject_to_object(out)
+
+    def equals(self, Tensor other):
+        """
+        Return true if the tensors contains exactly equal data
+        """
+        return self.tp.Equals(deref(other.tp))
+
+    def __eq__(self, other):
+        if isinstance(other, Tensor):
+            return self.equals(other)
+        else:
+            return NotImplemented
+
+    def dim_name(self, i):
+        return frombytes(self.tp.dim_name(i))
+
+    @property
+    def dim_names(self):
+        return [frombytes(x) for x in tuple(self.tp.dim_names())]
+
+    @property
+    def is_mutable(self):
+        return self.tp.is_mutable()
+
+    @property
+    def is_contiguous(self):
+        return self.tp.is_contiguous()
+
+    @property
+    def ndim(self):
+        return self.tp.ndim()
+
+    @property
+    def size(self):
+        return self.tp.size()
+
+    @property
+    def shape(self):
+        # Cython knows how to convert a vector[T] to a Python list
+        return tuple(self.tp.shape())
+
+    @property
+    def strides(self):
+        return tuple(self.tp.strides())
+
+    def __getbuffer__(self, cp.Py_buffer* buffer, int flags):
+        buffer.buf = <char *> self.tp.data().get().data()
+        pep3118_format = self.type.pep3118_format
+        if pep3118_format is None:
+            raise NotImplementedError("type %s not supported for buffer "
+                                      "protocol" % (self.type,))
+        buffer.format = pep3118_format
+        buffer.itemsize = self.type.bit_width // 8
+        buffer.internal = NULL
+        buffer.len = self.tp.size() * buffer.itemsize
+        buffer.ndim = self.tp.ndim()
+        buffer.obj = self
+        if self.tp.is_mutable():
+            buffer.readonly = 0
+        else:
+            buffer.readonly = 1
+        # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
+        # and strides arrays lifetime is tied to the tensor's
+        buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
+        buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
+        buffer.suboffsets = NULL
+
+
+cdef class SparseTensorCOO:
+    """
+    A sparse COO tensor.
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call SparseTensorCOO's constructor directly, "
+                        "use one of the `pyarrow.SparseTensorCOO.from_*` "
+                        "functions instead.")
+
+    cdef void init(self, const shared_ptr[CSparseTensorCOO]& sp_sparse_tensor):
+        self.sp_sparse_tensor = sp_sparse_tensor
+        self.stp = sp_sparse_tensor.get()
+        self.type = pyarrow_wrap_data_type(self.stp.type())
+
+    def __repr__(self):
+        return """<pyarrow.SparseTensorCOO>
+type: {0.type}
+shape: {0.shape}""".format(self)
+
+    @classmethod
+    def from_dense_numpy(cls, obj, dim_names=None):
+        """
+        Convert numpy.ndarray to arrow::SparseTensorCOO
+        """
+        return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
+
+    @staticmethod
+    def from_numpy(data, coords, shape, dim_names=None):
+        """
+        Create arrow::SparseTensorCOO from numpy.ndarrays
+        """
+        cdef shared_ptr[CSparseTensorCOO] csparse_tensor
+        cdef vector[int64_t] c_shape
+        cdef vector[c_string] c_dim_names
+
+        for x in shape:
+            c_shape.push_back(x)
+        if dim_names is not None:
+            for x in dim_names:
+                c_dim_names.push_back(tobytes(x))
+
+        # Enforce precondition for SparseTensorCOO indices
+        coords = np.require(coords, dtype='i8', requirements='F')
+        if coords.ndim != 2:
+            raise ValueError("Expected 2-dimensional array for "
+                             "SparseTensorCOO indices")
+
+        check_status(NdarraysToSparseTensorCOO(c_default_memory_pool(),
+                     data, coords, c_shape, c_dim_names, &csparse_tensor))
+        return pyarrow_wrap_sparse_tensor_coo(csparse_tensor)
+
+    @staticmethod
+    def from_tensor(obj):
+        """
+        Convert arrow::Tensor to arrow::SparseTensorCOO
+        """
+        cdef shared_ptr[CSparseTensorCOO] csparse_tensor
+        cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
+
+        with nogil:
+            check_status(TensorToSparseTensorCOO(ctensor, &csparse_tensor))
+
+        return pyarrow_wrap_sparse_tensor_coo(csparse_tensor)
+
+    def to_numpy(self):
+        """
+        Convert arrow::SparseTensorCOO to numpy.ndarrays with zero copy
+        """
+        cdef PyObject* out_data
+        cdef PyObject* out_coords
+
+        check_status(SparseTensorCOOToNdarray(self.sp_sparse_tensor, self,
+                                              &out_data, &out_coords))
+        return PyObject_to_object(out_data), PyObject_to_object(out_coords)
+
+    def equals(self, SparseTensorCOO other):
+        """
+        Return true if sparse tensors contains exactly equal data
+        """
+        return self.stp.Equals(deref(other.stp))
+
+    def __eq__(self, other):
+        if isinstance(other, SparseTensorCOO):
+            return self.equals(other)
+        else:
+            return NotImplemented
+
+    @property
+    def is_mutable(self):
+        return self.stp.is_mutable()
+
+    @property
+    def ndim(self):
+        return self.stp.ndim()
+
+    @property
+    def shape(self):
+        # Cython knows how to convert a vector[T] to a Python list
+        return tuple(self.stp.shape())
+
+    @property
+    def size(self):
+        return self.stp.size()
+
+    def dim_name(self, i):
+        return frombytes(self.stp.dim_name(i))
+
+    @property
+    def dim_names(self):
+        return [frombytes(x) for x in tuple(self.stp.dim_names())]
+
+    @property
+    def non_zero_length(self):
+        return self.stp.non_zero_length()
+
+
+cdef class SparseTensorCSR:
+    """
+    A sparse CSR tensor.
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call SparseTensorCSR's constructor directly, "
+                        "use one of the `pyarrow.SparseTensorCSR.from_*` "
+                        "functions instead.")
+
+    cdef void init(self, const shared_ptr[CSparseTensorCSR]& sp_sparse_tensor):
+        self.sp_sparse_tensor = sp_sparse_tensor
+        self.stp = sp_sparse_tensor.get()
+        self.type = pyarrow_wrap_data_type(self.stp.type())
+
+    def __repr__(self):
+        return """<pyarrow.SparseTensorCSR>
+type: {0.type}
+shape: {0.shape}""".format(self)
+
+    @classmethod
+    def from_dense_numpy(cls, obj, dim_names=None):
+        """
+        Convert numpy.ndarray to arrow::SparseTensorCSR
+        """
+        return cls.from_tensor(Tensor.from_numpy(obj, dim_names=dim_names))
+
+    @staticmethod
+    def from_numpy(data, indptr, indices, shape, dim_names=None):
+        """
+        Create arrow::SparseTensorCSR from numpy.ndarrays
+        """
+        cdef shared_ptr[CSparseTensorCSR] csparse_tensor
+        cdef vector[int64_t] c_shape
+        cdef vector[c_string] c_dim_names
+
+        for x in shape:
+            c_shape.push_back(x)
+        if dim_names is not None:
+            for x in dim_names:
+                c_dim_names.push_back(tobytes(x))
+
+        # Enforce precondition for SparseTensorCSR indices
+        indptr = np.require(indptr, dtype='i8')
+        indices = np.require(indices, dtype='i8')
+        if indptr.ndim != 1:
+            raise ValueError("Expected 1-dimensional array for "
+                             "SparseTensorCSR indptr")
+        if indices.ndim != 1:
+            raise ValueError("Expected 1-dimensional array for "
+                             "SparseTensorCSR indices")
+
+        check_status(NdarraysToSparseTensorCSR(c_default_memory_pool(),
+                     data, indptr, indices, c_shape, c_dim_names,
+                     &csparse_tensor))
+        return pyarrow_wrap_sparse_tensor_csr(csparse_tensor)
+
+    @staticmethod
+    def from_tensor(obj):
+        """
+        Convert arrow::Tensor to arrow::SparseTensorCSR
+        """
+        cdef shared_ptr[CSparseTensorCSR] csparse_tensor
+        cdef shared_ptr[CTensor] ctensor = pyarrow_unwrap_tensor(obj)
+
+        with nogil:
+            check_status(TensorToSparseTensorCSR(ctensor, &csparse_tensor))
+
+        return pyarrow_wrap_sparse_tensor_csr(csparse_tensor)
+
+    def to_numpy(self):
+        """
+        Convert arrow::SparseTensorCSR to numpy.ndarrays with zero copy
+        """
+        cdef PyObject* out_data
+        cdef PyObject* out_indptr
+        cdef PyObject* out_indices
+
+        check_status(SparseTensorCSRToNdarray(self.sp_sparse_tensor, self,
+                     &out_data, &out_indptr, &out_indices))
+        return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
+                PyObject_to_object(out_indices))
+
+    def equals(self, SparseTensorCSR other):
+        """
+        Return true if sparse tensors contains exactly equal data
+        """
+        return self.stp.Equals(deref(other.stp))
+
+    def __eq__(self, other):
+        if isinstance(other, SparseTensorCSR):
+            return self.equals(other)
+        else:
+            return NotImplemented
+
+    @property
+    def is_mutable(self):
+        return self.stp.is_mutable()
+
+    @property
+    def ndim(self):
+        return self.stp.ndim()
+
+    @property
+    def shape(self):
+        # Cython knows how to convert a vector[T] to a Python list
+        return tuple(self.stp.shape())
+
+    @property
+    def size(self):
+        return self.stp.size()
+
+    def dim_name(self, i):
+        return frombytes(self.stp.dim_name(i))
+
+    @property
+    def dim_names(self):
+        return [frombytes(x) for x in tuple(self.stp.dim_names())]
+
+    @property
+    def non_zero_length(self):
+        return self.stp.non_zero_length()
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 49075575ce5..fc25fc43977 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -38,7 +38,9 @@
 
 
 groups = [
+    'cython',
     'hypothesis',
+    'fastparquet',
     'gandiva',
     'hdfs',
     'large_memory',
@@ -47,11 +49,14 @@
     'parquet',
     'plasma',
     's3',
-    'tensorflow'
+    'tensorflow',
+    'flight'
 ]
 
 
 defaults = {
+    'cython': False,
+    'fastparquet': False,
     'hypothesis': False,
     'gandiva': False,
     'hdfs': False,
@@ -61,9 +66,22 @@
     'parquet': False,
     'plasma': False,
     's3': False,
-    'tensorflow': False
+    'tensorflow': False,
+    'flight': False,
 }
 
+try:
+    import cython  # noqa
+    defaults['cython'] = True
+except ImportError:
+    pass
+
+try:
+    import fastparquet  # noqa
+    defaults['fastparquet'] = True
+except ImportError:
+    pass
+
 try:
     import pyarrow.gandiva # noqa
     defaults['gandiva'] = True
@@ -76,14 +94,12 @@
 except ImportError:
     pass
 
-
 try:
     import pandas  # noqa
     defaults['pandas'] = True
 except ImportError:
     pass
 
-
 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
@@ -91,18 +107,23 @@
     pass
 
 try:
-    import pyarrow.plasma as plasma  # noqa
+    import pyarrow.plasma  # noqa
     defaults['plasma'] = True
 except ImportError:
     pass
 
-
 try:
     import tensorflow  # noqa
     defaults['tensorflow'] = True
 except ImportError:
     pass
 
+try:
+    import pyarrow.flight  # noqa
+    defaults['flight'] = True
+except ImportError:
+    pass
+
 
 def pytest_configure(config):
     for mark in groups:
diff --git a/python/pyarrow/tests/pyarrow_cython_example.pyx b/python/pyarrow/tests/pyarrow_cython_example.pyx
index 4a6f3ca5dea..160c1518b05 100644
--- a/python/pyarrow/tests/pyarrow_cython_example.pyx
+++ b/python/pyarrow/tests/pyarrow_cython_example.pyx
@@ -22,9 +22,17 @@ from pyarrow.lib cimport *
 
 
 def get_array_length(obj):
-    # Just an example function accessing both the pyarrow Cython API
+    # An example function accessing both the pyarrow Cython API
     # and the Arrow C++ API
     cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
     if arr.get() == NULL:
         raise TypeError("not an array")
     return arr.get().length()
+
+
+def make_null_array(length):
+    # An example function that returns a PyArrow object without PyArrow
+    # being imported explicitly at the Python level.
+    cdef shared_ptr[CArray] null_array
+    null_array.reset(new CNullArray(length))
+    return pyarrow_wrap_array(null_array)
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 514c5ad2b62..a2828643cc3 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -219,13 +219,6 @@ def chunked_arrays(draw, type, min_chunks=0, max_chunks=None, chunk_size=None):
     return pa.chunked_array(draw(chunks), type=type)
 
 
-def columns(type, min_chunks=0, max_chunks=None, chunk_size=None):
-    chunked_array = chunked_arrays(type, chunk_size=chunk_size,
-                                   min_chunks=min_chunks,
-                                   max_chunks=max_chunks)
-    return st.builds(pa.column, st.text(), chunked_array)
-
-
 @st.composite
 def record_batches(draw, type, rows=None, max_fields=None):
     if isinstance(rows, st.SearchStrategy):
@@ -258,6 +251,5 @@ def tables(draw, type, rows=None, max_fields=None):
 
 all_arrays = arrays(all_types)
 all_chunked_arrays = chunked_arrays(all_types)
-all_columns = columns(all_types)
 all_record_batches = record_batches(all_types)
 all_tables = tables(all_types)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 9d66d96e2c2..c14d291374b 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -647,27 +647,26 @@ def test_cast_integers_safe():
 def test_cast_none():
     # ARROW-3735: Ensure that calling cast(None) doesn't segfault.
     arr = pa.array([1, 2, 3])
-    col = pa.column('foo', [arr])
 
     with pytest.raises(TypeError):
         arr.cast(None)
 
-    with pytest.raises(TypeError):
-        col.cast(None)
-
 
-def test_cast_column():
+def test_cast_chunked_array():
     arrays = [pa.array([1, 2, 3]), pa.array([4, 5, 6])]
-
-    col = pa.column('foo', arrays)
+    carr = pa.chunked_array(arrays)
 
     target = pa.float64()
-    casted = col.cast(target)
-
-    expected = pa.column('foo', [x.cast(target) for x in arrays])
+    casted = carr.cast(target)
+    expected = pa.chunked_array([x.cast(target) for x in arrays])
     assert casted.equals(expected)
 
 
+def test_chunked_array_data_warns():
+    with pytest.warns(FutureWarning):
+        pa.chunked_array([[]]).data
+
+
 def test_cast_integers_unsafe():
     # We let NumPy do the unsafe casting
     unsafe_cases = [
@@ -781,8 +780,6 @@ def test_unique_simple():
     for arr, expected in cases:
         result = arr.unique()
         assert result.equals(expected)
-        result = pa.column("column", arr).unique()
-        assert result.equals(expected)
         result = pa.chunked_array([arr]).unique()
         assert result.equals(expected)
 
@@ -801,8 +798,6 @@ def test_dictionary_encode_simple():
     for arr, expected in cases:
         result = arr.dictionary_encode()
         assert result.equals(expected)
-        result = pa.column("column", arr).dictionary_encode()
-        assert result.data.chunk(0).equals(expected)
         result = pa.chunked_array([arr]).dictionary_encode()
         assert result.chunk(0).equals(expected)
 
@@ -1147,6 +1142,17 @@ def test_array_from_masked():
         pa.array(ma, mask=np.array([True, False, False, False]))
 
 
+def test_array_from_invalid_dim_raises():
+    msg = "only handle 1-dimensional arrays"
+    arr2d = np.array([[1, 2, 3], [4, 5, 6]])
+    with pytest.raises(ValueError, match=msg):
+        pa.array(arr2d)
+
+    arr0d = np.array(0)
+    with pytest.raises(ValueError, match=msg):
+        pa.array(arr0d)
+
+
 def test_buffers_primitive():
     a = pa.array([1, 2, None, 4], type=pa.int16())
     buffers = a.buffers()
@@ -1479,7 +1485,7 @@ def test_array_masked():
 
 def test_array_from_large_pyints():
     # ARROW-5430
-    with pytest.raises(pa.ArrowInvalid):
+    with pytest.raises(OverflowError):
         # too large for int64 so dtype must be explicitly provided
         pa.array([int(2 ** 63)])
 
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 4e040836979..81d5952b4b1 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -26,9 +26,12 @@
 import datetime
 import decimal
 import itertools
+import traceback
+import sys
+
 import numpy as np
-import six
 import pytz
+import six
 
 
 int_type_pairs = [
@@ -53,6 +56,19 @@ def __iter__(self):
         return self.lst.__iter__()
 
 
+class MyInt:
+    def __init__(self, value):
+        self.value = value
+
+    def __int__(self):
+        return self.value
+
+
+class MyBrokenInt:
+    def __int__(self):
+        1/0  # MARKER
+
+
 def check_struct_type(ty, expected):
     """
     Check a struct type is as expected, but not taking order into account.
@@ -191,7 +207,7 @@ def test_nested_lists(seq):
 @parametrize_with_iterable_types
 def test_list_with_non_list(seq):
     # List types don't accept non-sequences
-    with pytest.raises(pa.ArrowTypeError):
+    with pytest.raises(TypeError):
         pa.array(seq([[], [1, 2], 3]), type=pa.list_(pa.int64()))
 
 
@@ -299,6 +315,24 @@ def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@parametrize_with_iterable_types
+def test_sequence_custom_integers(seq):
+    expected = [0, 42, 2**33 + 1, -2**63]
+    data = list(map(MyInt, expected))
+    arr = pa.array(seq(data), type=pa.int64())
+    assert arr.to_pylist() == expected
+
+
+@parametrize_with_iterable_types
+def test_broken_integers(seq):
+    data = [MyBrokenInt()]
+    with pytest.raises(ZeroDivisionError) as exc_info:
+        pa.array(seq(data), type=pa.int64())
+    # Original traceback is kept
+    tb_lines = traceback.format_tb(exc_info.tb)
+    assert "# MARKER" in tb_lines[-1]
+
+
 def test_numpy_scalars_mixed_type():
     # ARROW-4324
     data = [np.int32(10), np.float32(0.5)]
@@ -308,7 +342,7 @@ def test_numpy_scalars_mixed_type():
 
 
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
-                   raises=pa.ArrowException)
+                   raises=OverflowError)
 def test_uint64_max_convert():
     data = [0, np.iinfo(np.uint64).max]
 
@@ -323,20 +357,20 @@ def test_uint64_max_convert():
 @pytest.mark.parametrize("bits", [8, 16, 32, 64])
 def test_signed_integer_overflow(bits):
     ty = getattr(pa, "int%d" % bits)()
-    # XXX ideally would raise OverflowError
-    with pytest.raises((ValueError, pa.ArrowException)):
+    # XXX ideally would always raise OverflowError
+    with pytest.raises((OverflowError, pa.ArrowInvalid)):
         pa.array([2 ** (bits - 1)], ty)
-    with pytest.raises((ValueError, pa.ArrowException)):
+    with pytest.raises((OverflowError, pa.ArrowInvalid)):
         pa.array([-2 ** (bits - 1) - 1], ty)
 
 
 @pytest.mark.parametrize("bits", [8, 16, 32, 64])
 def test_unsigned_integer_overflow(bits):
     ty = getattr(pa, "uint%d" % bits)()
-    # XXX ideally would raise OverflowError
-    with pytest.raises((ValueError, pa.ArrowException)):
+    # XXX ideally would always raise OverflowError
+    with pytest.raises((OverflowError, pa.ArrowInvalid)):
         pa.array([2 ** bits], ty)
-    with pytest.raises((ValueError, pa.ArrowException)):
+    with pytest.raises((OverflowError, pa.ArrowInvalid)):
         pa.array([-1], ty)
 
 
@@ -661,7 +695,7 @@ def test_sequence_explicit_types(input):
 def test_date32_overflow():
     # Overflow
     data3 = [2**32, None]
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises((OverflowError, pa.ArrowException)):
         pa.array(data3, type=pa.date32())
 
 
@@ -831,12 +865,19 @@ def test_sequence_timestamp_from_int_with_unit():
     assert repr(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
     assert str(arr_ns[0]) == "1970-01-01 00:00:00.000000001"
 
-    with pytest.raises(pa.ArrowException):
-        class CustomClass():
-            pass
-        pa.array([1, CustomClass()], type=ns)
-        pa.array([1, CustomClass()], type=pa.date32())
-        pa.array([1, CustomClass()], type=pa.date64())
+    if sys.version_info >= (3,):
+        expected_exc = TypeError
+    else:
+        # Can have "AttributeError: CustomClass instance
+        # has no attribute '__trunc__'"
+        expected_exc = (TypeError, AttributeError)
+
+    class CustomClass():
+        pass
+
+    for ty in [ns, pa.date32(), pa.date64()]:
+        with pytest.raises(expected_exc):
+            pa.array([1, CustomClass()], type=ty)
 
 
 def test_sequence_nesting_levels():
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index df4d0a5b55c..9f0c08bd490 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -188,7 +188,7 @@ def read_bytes(self, b, **kwargs):
 
     def check_names(self, table, names):
         assert table.num_columns == len(names)
-        assert [c.name for c in table.columns] == names
+        assert table.column_names == names
 
     def test_file_object(self):
         data = b"a,b\n1,2\n"
@@ -376,7 +376,7 @@ def test_column_types(self):
         with pytest.raises(pa.ArrowInvalid) as exc:
             self.read_bytes(rows, convert_options=opts)
         err = str(exc.value)
-        assert "In column #1: " in err
+        assert "In CSV column #1: " in err
         assert "CSV conversion error to float: invalid value 'XXX'" in err
 
     def test_no_ending_newline(self):
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 57dbeb554ca..202868d5c71 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -23,11 +23,12 @@
 import pytest
 
 import pyarrow as pa
-
 import pyarrow.tests.util as test_util
 
+
 here = os.path.dirname(os.path.abspath(__file__))
 
+
 setup_template = """if 1:
     from distutils.core import setup
     from Cython.Build import cythonize
@@ -50,6 +51,8 @@
         if custom_ld_path:
             ext.library_dirs.append(custom_ld_path)
         ext.extra_compile_args.extend(compiler_opts)
+        print("Extension module:",
+              ext, ext.include_dirs, ext.libraries, ext.library_dirs)
 
     setup(
         ext_modules=ext_modules,
@@ -57,18 +60,15 @@
 """
 
 
-@pytest.mark.skipif(
-    'ARROW_HOME' not in os.environ,
-    reason='ARROW_HOME environment variable not defined')
+@pytest.mark.cython
 def test_cython_api(tmpdir):
     """
     Basic test for the Cython API.
     """
-    pytest.importorskip('Cython')
+    # Fail early if cython is not found
+    import cython  # noqa
 
-    ld_path_default = os.path.join(os.environ['ARROW_HOME'], 'lib')
-
-    test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', ld_path_default)
+    test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
 
     with tmpdir.as_cwd():
         # Set up temporary workspace
@@ -106,3 +106,27 @@ def test_cython_api(tmpdir):
                 mod.get_array_length(None)
         finally:
             sys.path = orig_path
+
+        # Check the extension module is loadable from a subprocess without
+        # pyarrow imported first.
+        code = """if 1:
+            import sys
+
+            mod = __import__({mod_name!r})
+            arr = mod.make_null_array(5)
+            assert mod.get_array_length(arr) == 5
+            assert arr.null_count == 5
+        """.format(mod_path=str(tmpdir), mod_name='pyarrow_cython_example')
+
+        if sys.platform == 'win32':
+            delim, var = ';', 'PATH'
+        else:
+            delim, var = ':', 'LD_LIBRARY_PATH'
+
+        subprocess_env[var] = delim.join(
+            pa.get_library_dirs() + [subprocess_env.get(var, '')]
+        )
+
+        subprocess.check_call([sys.executable, '-c', code],
+                              stdout=subprocess.PIPE,
+                              env=subprocess_env)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index f26f7ca95b6..137dfeaeaa9 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -69,7 +69,8 @@ def _get_null_counts(self, path, columns=None):
         counts = []
         for i in range(reader.num_columns):
             col = reader.get_column(i)
-            if columns is None or col.name in columns:
+            name = reader.get_column_name(i)
+            if columns is None or name in columns:
                 counts.append(col.null_count)
 
         return counts
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index a8973ab7343..89b9a956ee3 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -32,7 +32,25 @@
 from pyarrow.compat import tobytes
 from pyarrow.util import pathlib
 
-flight = pytest.importorskip("pyarrow.flight")
+try:
+    from pyarrow import flight
+    from pyarrow.flight import (
+        FlightServerBase, ServerAuthHandler, ClientAuthHandler
+    )
+except ImportError:
+    flight = None
+    FlightServerBase = object
+    ServerAuthHandler, ClientAuthHandler = object, object
+
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not flight'
+pytestmark = pytest.mark.flight
+
+
+def test_import():
+    # So we see the ImportError somewhere
+    import pyarrow.flight  # noqa
 
 
 def resource_root():
@@ -92,7 +110,7 @@ def simple_dicts_table():
     return pa.Table.from_arrays(data, names=['some_dicts'])
 
 
-class ConstantFlightServer(flight.FlightServerBase):
+class ConstantFlightServer(FlightServerBase):
     """A Flight server that always returns the same data.
 
     See ARROW-4796: this server implementation will segfault if Flight
@@ -114,7 +132,7 @@ def do_get(self, context, ticket):
         return flight.RecordBatchStream(table)
 
 
-class MetadataFlightServer(flight.FlightServerBase):
+class MetadataFlightServer(FlightServerBase):
     """A Flight server that numbers incoming/outgoing data."""
 
     def do_get(self, context, ticket):
@@ -151,7 +169,7 @@ def number_batches(table):
             yield batch, buf
 
 
-class EchoFlightServer(flight.FlightServerBase):
+class EchoFlightServer(FlightServerBase):
     """A Flight server that returns the last data uploaded."""
 
     def __init__(self, expected_schema=None):
@@ -185,7 +203,7 @@ def do_action(self, context, action):
         raise NotImplementedError
 
 
-class GetInfoFlightServer(flight.FlightServerBase):
+class GetInfoFlightServer(FlightServerBase):
     """A Flight server that tests GetFlightInfo."""
 
     def get_flight_info(self, context, descriptor):
@@ -204,7 +222,7 @@ def get_flight_info(self, context, descriptor):
         )
 
 
-class CheckTicketFlightServer(flight.FlightServerBase):
+class CheckTicketFlightServer(FlightServerBase):
     """A Flight server that compares the given ticket to an expected value."""
 
     def __init__(self, expected_ticket):
@@ -221,7 +239,7 @@ def do_put(self, context, descriptor, reader):
         self.last_message = reader.read_all()
 
 
-class InvalidStreamFlightServer(flight.FlightServerBase):
+class InvalidStreamFlightServer(FlightServerBase):
     """A Flight server that tries to return messages with differing schemas."""
 
     schema = pa.schema([('a', pa.int32())])
@@ -237,7 +255,7 @@ def do_get(self, context, ticket):
         return flight.GeneratorStream(self.schema, [table1, table2])
 
 
-class SlowFlightServer(flight.FlightServerBase):
+class SlowFlightServer(FlightServerBase):
     """A Flight server that delays its responses to test timeouts."""
 
     def do_get(self, context, ticket):
@@ -258,11 +276,11 @@ def slow_stream():
         yield pa.Table.from_arrays(data1, names=['a'])
 
 
-class HttpBasicServerAuthHandler(flight.ServerAuthHandler):
+class HttpBasicServerAuthHandler(ServerAuthHandler):
     """An example implementation of HTTP basic authentication."""
 
     def __init__(self, creds):
-        super().__init__()
+        super(HttpBasicServerAuthHandler, self).__init__()
         self.creds = creds
 
     def authenticate(self, outgoing, incoming):
@@ -280,11 +298,11 @@ def is_valid(self, token):
         return username
 
 
-class HttpBasicClientAuthHandler(flight.ClientAuthHandler):
+class HttpBasicClientAuthHandler(ClientAuthHandler):
     """An example implementation of HTTP basic authentication."""
 
     def __init__(self, username, password):
-        super().__init__()
+        super(HttpBasicClientAuthHandler, self).__init__()
         self.username = tobytes(username)
         self.password = tobytes(password)
 
@@ -295,11 +313,11 @@ def get_token(self):
         return base64.b64encode(self.username + b':' + self.password)
 
 
-class TokenServerAuthHandler(flight.ServerAuthHandler):
+class TokenServerAuthHandler(ServerAuthHandler):
     """An example implementation of authentication via handshake."""
 
     def __init__(self, creds):
-        super().__init__()
+        super(TokenServerAuthHandler, self).__init__()
         self.creds = creds
 
     def authenticate(self, outgoing, incoming):
@@ -317,11 +335,11 @@ def is_valid(self, token):
         return token[7:]
 
 
-class TokenClientAuthHandler(flight.ClientAuthHandler):
+class TokenClientAuthHandler(ClientAuthHandler):
     """An example implementation of authentication via handshake."""
 
     def __init__(self, username, password):
-        super().__init__()
+        super(TokenClientAuthHandler, self).__init__()
         self.username = username
         self.password = password
         self.token = b''
@@ -360,13 +378,14 @@ def flight_server(server_base, *args, **kwargs):
 
     ctor_kwargs = kwargs
     server_instance = server_base(*args, **ctor_kwargs)
+    # The server instance needs to be initialized before shutdown()
+    # can be called
+    server_instance.init(location,
+                         auth_handler=auth_handler,
+                         tls_certificates=tls_certificates)
 
     def _server_thread():
-        server_instance.run(
-            location,
-            auth_handler=auth_handler,
-            tls_certificates=tls_certificates,
-        )
+        server_instance.run()
 
     thread = threading.Thread(target=_server_thread, daemon=True)
     thread.start()
@@ -614,7 +633,7 @@ def test_location_invalid():
 
     server = ConstantFlightServer()
     with pytest.raises(pa.ArrowInvalid, match=".*Cannot parse URI:.*"):
-        server.run("%")
+        server.init("%")
 
 
 @pytest.mark.slow
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index f7c316a8baf..8871d69cea1 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -45,7 +45,6 @@ def test_cpu_count():
 @pytest.mark.parametrize('klass', [
     pa.Field,
     pa.Schema,
-    pa.Column,
     pa.ChunkedArray,
     pa.RecordBatch,
     pa.Table,
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 1854898b372..931c2b10caa 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -147,7 +147,7 @@ class TestConvertMetadata(object):
     def test_non_string_columns(self):
         df = pd.DataFrame({0: [1, 2, 3]})
         table = pa.Table.from_pandas(df)
-        assert table.column(0).name == '0'
+        assert table.field(0).name == '0'
 
     def test_from_pandas_with_columns(self):
         df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]},
@@ -1034,7 +1034,7 @@ class MyDate(date):
     def test_datetime64_to_date32(self):
         # ARROW-1718
         arr = pa.array([date(2017, 10, 23), None])
-        c = pa.Column.from_array("d", arr)
+        c = pa.chunked_array([arr])
         s = c.to_pandas()
 
         arr2 = pa.Array.from_pandas(s, type=pa.date32())
@@ -1090,8 +1090,7 @@ def test_array_types_date_as_object(self):
         objects = [
             # The second value is the expected value for date_as_object=False
             (pa.array(data), expected),
-            (pa.chunked_array([data]), expected),
-            (pa.column('date', [data]), expected.astype('M8[ns]'))]
+            (pa.chunked_array([data]), expected)]
 
         assert objects[0][0].equals(pa.array(expected))
 
@@ -2491,8 +2490,7 @@ def test_to_pandas_deduplicate_strings_array_types():
 
     for arr in [pa.array(values, type=pa.binary()),
                 pa.array(values, type=pa.utf8()),
-                pa.chunked_array([values, values]),
-                pa.column('foo', [values, values])]:
+                pa.chunked_array([values, values])]:
         _assert_nunique(arr.to_pandas(), nunique)
         _assert_nunique(arr.to_pandas(deduplicate_objects=False), len(arr))
 
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 436d5bfc515..ca3fbc4c816 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -118,12 +118,13 @@ def test_single_pylist_column_roundtrip(tempdir, dtype):
     table = pa.Table.from_arrays(data, names=['a'])
     _write_table(table, filename)
     table_read = _read_table(filename)
-    for col_written, col_read in zip(table.itercolumns(),
-                                     table_read.itercolumns()):
-        assert col_written.name == col_read.name
-        assert col_read.data.num_chunks == 1
-        data_written = col_written.data.chunk(0)
-        data_read = col_read.data.chunk(0)
+    for i in range(table.num_columns):
+        col_written = table[i]
+        col_read = table_read[i]
+        assert table.field(i).name == table_read.field(i).name
+        assert col_read.num_chunks == 1
+        data_written = col_written.chunk(0)
+        data_read = col_read.chunk(0)
         assert data_written.equals(data_read)
 
 
@@ -658,11 +659,23 @@ def test_parquet_metadata_api():
             assert isinstance(col_meta, pq.ColumnChunkMetaData)
             repr(col_meta)
 
+    with pytest.raises(IndexError):
+        meta.row_group(-1)
+
+    with pytest.raises(IndexError):
+        meta.row_group(meta.num_row_groups + 1)
+
     rg_meta = meta.row_group(0)
     assert rg_meta.num_rows == len(df)
     assert rg_meta.num_columns == ncols + 1  # +1 for index
     assert rg_meta.total_byte_size > 0
 
+    with pytest.raises(IndexError):
+        col_meta = rg_meta.column(-1)
+
+    with pytest.raises(IndexError):
+        col_meta = rg_meta.column(ncols + 2)
+
     col_meta = rg_meta.column(0)
     assert col_meta.file_offset > 0
     assert col_meta.file_path == ''  # created from BytesIO
@@ -1953,12 +1966,14 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
     assert result3.equals(expected)
 
     # Read column subset
-    to_read = [result[0], result[2], result[6], result[result.num_columns - 1]]
+    to_read = [0, 2, 6, result.num_columns - 1]
 
-    result = pa.localfs.read_parquet(
-        dirpath, columns=[c.name for c in to_read])
-    expected = pa.Table.from_arrays(to_read, metadata=result.schema.metadata)
-    assert result.equals(expected)
+    col_names = [result.field(i).name for i in to_read]
+    out = pa.localfs.read_parquet(dirpath, columns=col_names)
+    expected = pa.Table.from_arrays([result.column(i) for i in to_read],
+                                    names=col_names,
+                                    metadata=result.schema.metadata)
+    assert out.equals(expected)
 
     # Read with multiple threads
     pa.localfs.read_parquet(dirpath, use_threads=True)
@@ -2965,4 +2980,4 @@ def test_filter_before_validate_schema(tempdir):
 
     # read single file using filter
     table = pq.read_table(tempdir, filters=[[('A', '==', 0)]])
-    assert table.column('B').equals(pa.column('B', pa.array([1, 2, 3])))
+    assert table.column('B').equals(pa.chunked_array([[1, 2, 3]]))
diff --git a/python/pyarrow/tests/test_plasma.py b/python/pyarrow/tests/test_plasma.py
index 149bdd54f6c..5381c26f6fc 100644
--- a/python/pyarrow/tests/test_plasma.py
+++ b/python/pyarrow/tests/test_plasma.py
@@ -227,7 +227,7 @@ def test_create_and_seal(self):
         # Make sure that creating the same object twice raises an exception.
         object_id = random_object_id()
         self.plasma_client.create_and_seal(object_id, b'a', b'b')
-        with pytest.raises(pa.PlasmaObjectExists):
+        with pytest.raises(pa.plasma.PlasmaObjectExists):
             self.plasma_client.create_and_seal(object_id, b'a', b'b')
 
         # Make sure that these objects can be evicted.
@@ -852,9 +852,13 @@ def test_use_full_memory(self):
         for _ in range(2):
             create_object(self.plasma_client2, DEFAULT_PLASMA_STORE_MEMORY, 0)
         # Verify that an object that is too large does not fit.
-        with pytest.raises(pa.lib.PlasmaStoreFull):
-            create_object(self.plasma_client2,
-                          DEFAULT_PLASMA_STORE_MEMORY + SMALL_OBJECT_SIZE, 0)
+        # Also verifies that the right error is thrown, and does not
+        # create the object ID prematurely.
+        object_id = random_object_id()
+        for i in range(3):
+            with pytest.raises(pa.plasma.PlasmaStoreFull):
+                self.plasma_client2.create(
+                    object_id, DEFAULT_PLASMA_STORE_MEMORY + SMALL_OBJECT_SIZE)
 
     def test_client_death_during_get(self):
         import pyarrow.plasma as plasma
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index fadb901977c..80c91bdfd49 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -371,6 +371,15 @@ def test_schema_equals_propagates_check_metadata():
     assert schema1.equals(schema2, check_metadata=False)
 
 
+def test_schema_equals_invalid_type():
+    # ARROW-5873
+    schema = pa.schema([pa.field("a", pa.int64())])
+
+    for val in [None, 'string', pa.array([1, 2])]:
+        with pytest.raises(TypeError):
+            schema.equals(val)
+
+
 def test_schema_equality_operators():
     fields = [
         pa.field('foo', pa.int32()),
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index 22983e77e99..6c626992dce 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -302,6 +302,14 @@ def custom_deserializer(serialized_obj):
     assert deserialized == (0, 'a')
 
 
+def test_primitive_serialization_notbroken(large_buffer):
+    serialization_roundtrip({(1, 2): 2}, large_buffer)
+
+
+def test_primitive_serialization_broken(large_buffer):
+    serialization_roundtrip({(): 2}, large_buffer)
+
+
 def test_primitive_serialization(large_buffer):
     for obj in PRIMITIVE_OBJECTS:
         serialization_roundtrip(obj, large_buffer)
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
new file mode 100644
index 00000000000..68564dacf4b
--- /dev/null
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -0,0 +1,221 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import sys
+
+import numpy as np
+import pyarrow as pa
+
+
+tensor_type_pairs = [
+    ('i1', pa.int8()),
+    ('i2', pa.int16()),
+    ('i4', pa.int32()),
+    ('i8', pa.int64()),
+    ('u1', pa.uint8()),
+    ('u2', pa.uint16()),
+    ('u4', pa.uint32()),
+    ('u8', pa.uint64()),
+    ('f2', pa.float16()),
+    ('f4', pa.float32()),
+    ('f8', pa.float64())
+]
+
+
+@pytest.mark.parametrize('sparse_tensor_type', [
+    pa.SparseTensorCSR,
+    pa.SparseTensorCOO,
+])
+def test_sparse_tensor_attrs(sparse_tensor_type):
+    data = np.array([
+        [0, 1, 0, 0, 1],
+        [0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 0],
+        [0, 0, 0, 0, 0],
+        [0, 3, 0, 0, 0],
+    ])
+    dim_names = ['x', 'y']
+    sparse_tensor = sparse_tensor_type.from_dense_numpy(data, dim_names)
+
+    assert sparse_tensor.ndim == 2
+    assert sparse_tensor.size == 25
+    assert sparse_tensor.shape == data.shape
+    assert sparse_tensor.is_mutable
+    assert sparse_tensor.dim_name(0) == dim_names[0]
+    assert sparse_tensor.dim_names == dim_names
+    assert sparse_tensor.non_zero_length == 4
+
+
+def test_sparse_tensor_coo_base_object():
+    data = np.array([[4], [9], [7], [5]])
+    coords = np.array([[0, 0], [0, 2], [1, 1], [3, 3]])
+    array = np.array([[4, 0, 9, 0],
+                      [0, 7, 0, 0],
+                      [0, 0, 0, 0],
+                      [0, 0, 0, 5]])
+    sparse_tensor = pa.SparseTensorCOO.from_dense_numpy(array)
+    n = sys.getrefcount(sparse_tensor)
+    result_data, result_coords = sparse_tensor.to_numpy()
+    assert sys.getrefcount(sparse_tensor) == n + 2
+
+    sparse_tensor = None
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(coords, result_coords)
+    assert result_coords.flags.f_contiguous  # column-major
+
+
+def test_sparse_tensor_csr_base_object():
+    data = np.array([[1], [2], [3], [4], [5], [6]])
+    indptr = np.array([0, 2, 3, 6])
+    indices = np.array([0, 2, 2, 0, 1, 2])
+    array = np.array([[1, 0, 2],
+                      [0, 0, 3],
+                      [4, 5, 6]])
+
+    sparse_tensor = pa.SparseTensorCSR.from_dense_numpy(array)
+    n = sys.getrefcount(sparse_tensor)
+    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
+    assert sys.getrefcount(sparse_tensor) == n + 3
+
+    sparse_tensor = None
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(indptr, result_indptr)
+    assert np.array_equal(indices, result_indices)
+
+
+@pytest.mark.parametrize('sparse_tensor_type', [
+    pa.SparseTensorCSR,
+    pa.SparseTensorCOO,
+])
+def test_sparse_tensor_equals(sparse_tensor_type):
+    def eq(a, b):
+        assert a.equals(b)
+        assert a == b
+        assert not (a != b)
+
+    def ne(a, b):
+        assert not a.equals(b)
+        assert not (a == b)
+        assert a != b
+
+    data = np.random.randn(10, 6)[::, ::2]
+    sparse_tensor1 = sparse_tensor_type.from_dense_numpy(data)
+    sparse_tensor2 = sparse_tensor_type.from_dense_numpy(
+        np.ascontiguousarray(data))
+    eq(sparse_tensor1, sparse_tensor2)
+    data = data.copy()
+    data[9, 0] = 1.0
+    sparse_tensor2 = sparse_tensor_type.from_dense_numpy(
+        np.ascontiguousarray(data))
+    ne(sparse_tensor1, sparse_tensor2)
+
+
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
+def test_sparse_tensor_coo_from_dense(dtype_str, arrow_type):
+    dtype = np.dtype(dtype_str)
+    data = np.array([[4], [9], [7], [5]]).astype(dtype)
+    coords = np.array([[0, 0], [0, 2], [1, 1], [3, 3]])
+    array = np.array([[4, 0, 9, 0],
+                      [0, 7, 0, 0],
+                      [0, 0, 0, 0],
+                      [0, 0, 0, 5]]).astype(dtype)
+    tensor = pa.Tensor.from_numpy(array)
+
+    # Test from numpy array
+    sparse_tensor = pa.SparseTensorCOO.from_dense_numpy(array)
+    repr(sparse_tensor)
+    assert sparse_tensor.type == arrow_type
+    result_data, result_coords = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(coords, result_coords)
+
+    # Test from Tensor
+    sparse_tensor = pa.SparseTensorCOO.from_tensor(tensor)
+    repr(sparse_tensor)
+    assert sparse_tensor.type == arrow_type
+    result_data, result_coords = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(coords, result_coords)
+
+
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
+def test_sparse_tensor_csr_from_dense(dtype_str, arrow_type):
+    dtype = np.dtype(dtype_str)
+    dense_data = np.array([[1, 0, 2],
+                           [0, 0, 3],
+                           [4, 5, 6]]).astype(dtype)
+
+    data = np.array([[1], [2], [3], [4], [5], [6]])
+    indptr = np.array([0, 2, 3, 6])
+    indices = np.array([0, 2, 2, 0, 1, 2])
+    tensor = pa.Tensor.from_numpy(dense_data)
+
+    # Test from numpy array
+    sparse_tensor = pa.SparseTensorCSR.from_dense_numpy(dense_data)
+    repr(sparse_tensor)
+    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(indptr, result_indptr)
+    assert np.array_equal(indices, result_indices)
+
+    # Test from Tensor
+    sparse_tensor = pa.SparseTensorCSR.from_tensor(tensor)
+    repr(sparse_tensor)
+    assert sparse_tensor.type == arrow_type
+    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(indptr, result_indptr)
+    assert np.array_equal(indices, result_indices)
+
+
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
+def test_sparse_tensor_coo_numpy_roundtrip(dtype_str, arrow_type):
+    dtype = np.dtype(dtype_str)
+    data = np.array([[4], [9], [7], [5]]).astype(dtype)
+    coords = np.array([[0, 0], [3, 3], [1, 1], [0, 2]])
+    shape = (4, 4)
+    dim_names = ["x", "y"]
+
+    sparse_tensor = pa.SparseTensorCOO.from_numpy(data, coords, shape,
+                                                  dim_names)
+    repr(sparse_tensor)
+    assert sparse_tensor.type == arrow_type
+    result_data, result_coords = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(coords, result_coords)
+    assert sparse_tensor.dim_names == dim_names
+
+
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
+def test_sparse_tensor_csr_numpy_roundtrip(dtype_str, arrow_type):
+    dtype = np.dtype(dtype_str)
+    data = np.array([[1], [2], [3], [4], [5], [6]]).astype(dtype)
+    indptr = np.array([0, 2, 3, 6])
+    indices = np.array([0, 2, 2, 0, 1, 2])
+    shape = (3, 3)
+    dim_names = ["x", "y"]
+
+    sparse_tensor = pa.SparseTensorCSR.from_numpy(data, indptr, indices,
+                                                  shape, dim_names)
+    repr(sparse_tensor)
+    assert sparse_tensor.type == arrow_type
+    result_data, result_indptr, result_indices = sparse_tensor.to_numpy()
+    assert np.array_equal(data, result_data)
+    assert np.array_equal(indptr, result_indptr)
+    assert np.array_equal(indices, result_indices)
+    assert sparse_tensor.dim_names == dim_names
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index f1227d26e36..f4249df2462 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -46,11 +46,6 @@ def test_chunked_arrays(chunked_array):
     assert isinstance(chunked_array, pa.lib.ChunkedArray)
 
 
-@h.given(past.all_columns)
-def test_columns(column):
-    assert isinstance(column, pa.lib.Column)
-
-
 @h.given(past.all_record_batches)
 def test_record_batches(record_bath):
     assert isinstance(record_bath, pa.lib.RecordBatch)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index c7216ea738f..0645fcbe180 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -186,9 +187,9 @@ def test_chunked_array_to_pandas():
         pa.array([-10, -5, 0, 5, 10])
     ]
     table = pa.Table.from_arrays(data, names=['a'])
-    chunked_arr = table.column(0).data
-    assert isinstance(chunked_arr, pa.ChunkedArray)
-    array = chunked_arr.to_pandas()
+    col = table.column(0)
+    assert isinstance(col, pa.ChunkedArray)
+    array = col.to_pandas()
     assert array.shape == (5,)
     assert array[0] == -10
 
@@ -223,124 +224,22 @@ def test_chunked_array_asarray():
     assert np_arr.dtype == np.dtype('float64')
 
 
-def test_column_basics():
-    data = [
-        pa.array([-10, -5, 0, 5, 10])
-    ]
-    table = pa.Table.from_arrays(data, names=['a'])
-    column = table.column(0)
-    assert column.name == 'a'
-    assert column.length() == 5
-    assert len(column) == 5
-    assert column.shape == (5,)
-    assert column.to_pylist() == [-10, -5, 0, 5, 10]
-    assert column == pa.Column.from_array("a", column.data)
-    assert column != pa.Column.from_array("b", column.data)
-    assert column != column.data
-    assert not column.equals(None)
-
-
-def test_column_factory_function():
-    # ARROW-1575
-    arr = pa.array([0, 1, 2, 3, 4])
-    arr2 = pa.array([5, 6, 7, 8])
-
-    col1 = pa.Column.from_array('foo', arr)
-    col2 = pa.Column.from_array(pa.field('foo', arr.type), arr)
-
-    assert col1.equals(col2)
-
-    col3 = pa.column('foo', [arr, arr2])
-    chunked_arr = pa.chunked_array([arr, arr2])
-    col4 = pa.column('foo', chunked_arr)
-    assert col3.equals(col4)
-
-    col5 = pa.column('foo', arr.to_pandas())
-    assert col5.equals(pa.column('foo', arr))
-
-    # Type mismatch
-    with pytest.raises(ValueError):
-        pa.Column.from_array(pa.field('foo', pa.string()), arr)
-
-
-def test_column_pickle():
-    arr = pa.chunked_array([[1, 2], [5, 6, 7]], type=pa.int16())
-    field = pa.field("ints", pa.int16()).add_metadata({b"foo": b"bar"})
-    col = pa.column(field, arr)
-
-    result = pickle.loads(pickle.dumps(col))
-    assert result.equals(col)
-    assert result.data.num_chunks == 2
-    assert result.field == field
-
-
-@pytest.mark.pandas
-def test_column_to_pandas():
-    data = [
-        pa.array([-10, -5, 0, 5, 10])
-    ]
-    table = pa.Table.from_arrays(data, names=['a'])
-    column = table.column(0)
-    series = column.to_pandas()
-    assert series.name == 'a'
-    assert series.shape == (5,)
-    assert series.iloc[0] == -10
-
-
-def test_column_asarray():
-    data = [
-        pa.array([-10, -5, 0, 5, 10])
-    ]
-    table = pa.Table.from_arrays(data, names=['a'])
-    column = table.column(0)
-
-    np_arr = np.asarray(column)
-    assert np_arr.tolist() == [-10, -5, 0, 5, 10]
-    assert np_arr.dtype == np.dtype('int64')
-
-    # An optional type can be specified when calling np.asarray
-    np_arr = np.asarray(column, dtype='str')
-    assert np_arr.tolist() == ['-10', '-5', '0', '5', '10']
-
-
-def test_column_flatten():
+def test_chunked_array_flatten():
     ty = pa.struct([pa.field('x', pa.int16()),
                     pa.field('y', pa.float32())])
     a = pa.array([(1, 2.5), (3, 4.5), (5, 6.5)], type=ty)
-    col = pa.Column.from_array('foo', a)
-    x, y = col.flatten()
-    assert x == pa.column('foo.x', pa.array([1, 3, 5], type=pa.int16()))
-    assert y == pa.column('foo.y', pa.array([2.5, 4.5, 6.5],
-                                            type=pa.float32()))
+    carr = pa.chunked_array(a)
+    x, y = carr.flatten()
+    assert x.equals(pa.chunked_array(pa.array([1, 3, 5], type=pa.int16())))
+    assert y.equals(pa.chunked_array(pa.array([2.5, 4.5, 6.5],
+                                              type=pa.float32())))
+
     # Empty column
     a = pa.array([], type=ty)
-    col = pa.Column.from_array('foo', a)
-    x, y = col.flatten()
-    assert x == pa.column('foo.x', pa.array([], type=pa.int16()))
-    assert y == pa.column('foo.y', pa.array([], type=pa.float32()))
-
-
-def test_column_getitem():
-    arr = pa.array([1, 2, 3, 4, 5, 6])
-    col = pa.column('ints', arr)
-
-    assert col[1].as_py() == 2
-    assert col[-1].as_py() == 6
-    assert col[-6].as_py() == 1
-    with pytest.raises(IndexError):
-        col[6]
-    with pytest.raises(IndexError):
-        col[-7]
-
-    data_slice = col[2:4]
-    assert data_slice.to_pylist() == [3, 4]
-
-    data_slice = col[4:-1]
-    assert data_slice.to_pylist() == [5]
-
-    data_slice = col[99:99]
-    assert data_slice.type == col.type
-    assert data_slice.to_pylist() == []
+    carr = pa.chunked_array(a)
+    x, y = carr.flatten()
+    assert x.equals(pa.chunked_array(pa.array([], type=pa.int16())))
+    assert y.equals(pa.chunked_array(pa.array([], type=pa.float32())))
 
 
 def test_recordbatch_basics():
@@ -481,7 +380,7 @@ def test_recordbatchlist_schema_equals():
 
 
 def test_table_equals():
-    table = pa.Table.from_arrays([])
+    table = pa.Table.from_arrays([], names=[])
 
     assert table.equals(table)
     # ARROW-4822
@@ -497,8 +396,8 @@ def test_table_from_batches_and_schema():
                                        names=['a', 'b'])
     table = pa.Table.from_batches([batch], schema)
     assert table.schema.equals(schema)
-    assert table.column(0) == pa.column('a', pa.array([1]))
-    assert table.column(1) == pa.column('b', pa.array([3.14]))
+    assert table.column(0) == pa.chunked_array([[1]])
+    assert table.column(1) == pa.chunked_array([[3.14]])
 
     incompatible_schema = pa.schema([pa.field('a', pa.int64())])
     with pytest.raises(pa.ArrowInvalid):
@@ -565,18 +464,19 @@ def test_table_basics():
     columns = []
     for col in table.itercolumns():
         columns.append(col)
-        for chunk in col.data.iterchunks():
+        for chunk in col.iterchunks():
             assert chunk is not None
 
         with pytest.raises(IndexError):
-            col.data.chunk(-1)
+            col.chunk(-1)
 
         with pytest.raises(IndexError):
-            col.data.chunk(col.data.num_chunks)
+            col.chunk(col.num_chunks)
 
     assert table.columns == columns
-    assert table == pa.Table.from_arrays(columns)
-    assert table != pa.Table.from_arrays(columns[1:])
+    assert table == pa.Table.from_arrays(columns, names=table.column_names)
+    assert table != pa.Table.from_arrays(columns[1:],
+                                         names=table.column_names[1:])
     assert table != columns
 
 
@@ -586,13 +486,10 @@ def test_table_from_arrays_preserves_column_metadata():
     arr1 = pa.array([3, 4])
     field0 = pa.field('field1', pa.int64(), metadata=dict(a="A", b="B"))
     field1 = pa.field('field2', pa.int64(), nullable=False)
-    columns = [
-        pa.column(field0, arr0),
-        pa.column(field1, arr1)
-    ]
-    table = pa.Table.from_arrays(columns)
-    assert b"a" in table.column(0).field.metadata
-    assert table.column(1).field.nullable is False
+    table = pa.Table.from_arrays([arr0, arr1],
+                                 schema=pa.schema([field0, field1]))
+    assert b"a" in table.field(0).metadata
+    assert table.field(1).nullable is False
 
 
 def test_table_from_arrays_invalid_names():
@@ -664,16 +561,16 @@ def test_table_add_column():
     ]
     table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
 
-    col = pa.Column.from_array('d', data[1])
-    t2 = table.add_column(3, col)
-    t3 = table.append_column(col)
+    new_field = pa.field('d', data[1].type)
+    t2 = table.add_column(3, new_field, data[1])
+    t3 = table.append_column(new_field, data[1])
 
     expected = pa.Table.from_arrays(data + [data[1]],
                                     names=('a', 'b', 'c', 'd'))
     assert t2.equals(expected)
     assert t3.equals(expected)
 
-    t4 = table.add_column(0, col)
+    t4 = table.add_column(0, new_field, data[1])
     expected = pa.Table.from_arrays([data[1]] + data,
                                     names=('d', 'a', 'b', 'c'))
     assert t4.equals(expected)
@@ -687,8 +584,8 @@ def test_table_set_column():
     ]
     table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
 
-    col = pa.Column.from_array('d', data[1])
-    t2 = table.set_column(0, col)
+    new_field = pa.field('d', data[1].type)
+    t2 = table.set_column(0, new_field, data[1])
 
     expected_data = list(data)
     expected_data[0] = data[1]
@@ -739,7 +636,7 @@ def test_table_remove_column_empty():
     t2._validate()
     assert len(t2) == len(table)
 
-    t3 = t2.add_column(0, table[0])
+    t3 = t2.add_column(0, table.field(0), table[0])
     t3._validate()
     assert t3.equals(table)
 
@@ -791,7 +688,7 @@ def test_table_combine_chunks():
     combined._validate()
     assert combined.equals(table)
     for c in combined.columns:
-        assert c.data.num_chunks == 1
+        assert c.num_chunks == 1
 
 
 def test_concat_tables():
@@ -1011,7 +908,12 @@ def test_table_from_pydict():
 def test_table_factory_function():
     import pandas as pd
 
-    d = {'a': [1, 2, 3], 'b': ['a', 'b', 'c']}
+    # Put in wrong order to make sure that lines up with schema
+    d = OrderedDict([('b', ['a', 'b', 'c']), ('a', [1, 2, 3])])
+
+    d_explicit = {'b': pa.array(['a', 'b', 'c'], type='string'),
+                  'a': pa.array([1, 2, 3], type='int32')}
+
     schema = pa.schema([('a', pa.int32()), ('b', pa.string())])
 
     df = pd.DataFrame(d)
@@ -1022,9 +924,25 @@ def test_table_factory_function():
     table2 = pa.Table.from_pandas(df, schema=schema)
     assert table1.equals(table2)
 
-    table1 = pa.table(d)
-    table2 = pa.Table.from_pydict(d)
+    table1 = pa.table(d_explicit)
+    table2 = pa.Table.from_pydict(d_explicit)
     assert table1.equals(table2)
+
+    # schema coerces type
     table1 = pa.table(d, schema=schema)
     table2 = pa.Table.from_pydict(d, schema=schema)
     assert table1.equals(table2)
+
+
+def test_table_function_unicode_schema():
+    col_a = "äääh"
+    col_b = "öööf"
+
+    # Put in wrong order to make sure that lines up with schema
+    d = OrderedDict([(col_b, ['a', 'b', 'c']), (col_a, [1, 2, 3])])
+
+    schema = pa.schema([(col_a, pa.int32()), (col_b, pa.string())])
+
+    result = pa.table(d, schema=schema)
+    assert result[0].chunk(0).equals(pa.array([1, 2, 3], type='int32'))
+    assert result[1].chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 188a4a5e1a5..13f05d27489 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -23,12 +23,28 @@
 import pyarrow as pa
 
 
+tensor_type_pairs = [
+    ('i1', pa.int8()),
+    ('i2', pa.int16()),
+    ('i4', pa.int32()),
+    ('i8', pa.int64()),
+    ('u1', pa.uint8()),
+    ('u2', pa.uint16()),
+    ('u4', pa.uint32()),
+    ('u8', pa.uint64()),
+    ('f2', pa.float16()),
+    ('f4', pa.float32()),
+    ('f8', pa.float64())
+]
+
+
 def test_tensor_attrs():
     data = np.random.randn(10, 4)
 
     tensor = pa.Tensor.from_numpy(data)
 
     assert tensor.ndim == 2
+    assert tensor.dim_names == []
     assert tensor.size == 40
     assert tensor.shape == data.shape
     assert tensor.strides == data.strides
@@ -42,6 +58,13 @@ def test_tensor_attrs():
     tensor = pa.Tensor.from_numpy(data2)
     assert not tensor.is_mutable
 
+    # With dim_names
+    tensor = pa.Tensor.from_numpy(data, dim_names=('x', 'y'))
+    assert tensor.ndim == 2
+    assert tensor.dim_names == ['x', 'y']
+    assert tensor.dim_name(0) == 'x'
+    assert tensor.dim_name(1) == 'y'
+
 
 def test_tensor_base_object():
     tensor = pa.Tensor.from_numpy(np.random.randn(10, 4))
@@ -50,19 +73,7 @@ def test_tensor_base_object():
     assert sys.getrefcount(tensor) == n + 1
 
 
-@pytest.mark.parametrize('dtype_str,arrow_type', [
-    ('i1', pa.int8()),
-    ('i2', pa.int16()),
-    ('i4', pa.int32()),
-    ('i8', pa.int64()),
-    ('u1', pa.uint8()),
-    ('u2', pa.uint16()),
-    ('u4', pa.uint32()),
-    ('u8', pa.uint64()),
-    ('f2', pa.float16()),
-    ('f4', pa.float32()),
-    ('f8', pa.float64())
-])
+@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
 def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
     dtype = np.dtype(dtype_str)
     data = (100 * np.random.randn(10, 4)).astype(dtype)
@@ -76,15 +87,6 @@ def test_tensor_numpy_roundtrip(dtype_str, arrow_type):
     assert (data == result).all()
 
 
-def _try_delete(path):
-    import gc
-    gc.collect()
-    try:
-        os.remove(path)
-    except os.error:
-        pass
-
-
 def test_tensor_ipc_roundtrip(tmpdir):
     data = np.random.randn(10, 4)
     tensor = pa.Tensor.from_numpy(data)
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 79e9713d76a..657df04f965 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import atexit
 import re
 import warnings
 
@@ -196,7 +197,9 @@ cdef class DictionaryMemo:
     """
     Tracking container for dictionary-encoded fields
     """
-    pass
+    def __cinit__(self):
+        self.sp_memo.reset(new CDictionaryMemo())
+        self.memo = self.sp_memo.get()
 
 
 cdef class DictionaryType(DataType):
@@ -589,32 +592,6 @@ cdef class UnknownExtensionType(ExtensionType):
         return self.serialized
 
 
-cdef class _ExtensionTypesInitializer:
-    #
-    # A private object that handles process-wide registration of the Python
-    # ExtensionType.
-    #
-
-    def __cinit__(self):
-        cdef:
-            DataType storage_type
-            shared_ptr[CExtensionType] cpy_ext_type
-
-        # Make a dummy C++ ExtensionType
-        storage_type = null()
-        check_status(CPyExtensionType.FromClass(storage_type.sp_type,
-                                                ExtensionType, &cpy_ext_type))
-        check_status(
-            RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))
-
-    def __dealloc__(self):
-        # This needs to be done explicitly before the Python interpreter is
-        # finalized.  If the C++ type is destroyed later in the process
-        # teardown stage, it will invoke CPython APIs such as Py_DECREF
-        # with a destroyed interpreter.
-        check_status(UnregisterPyExtensionType())
-
-
 cdef class Field:
     """
     A named field, with a data type, nullability, and optional metadata.
@@ -836,7 +813,7 @@ cdef class Schema:
             metadata=self.metadata
         )
 
-    def equals(self, other, bint check_metadata=True):
+    def equals(self, Schema other not None, bint check_metadata=True):
         """
         Test if this schema is equal to the other
 
@@ -850,8 +827,7 @@ cdef class Schema:
         -------
         is_equal : boolean
         """
-        cdef Schema _other = other
-        return self.sp_schema.get().Equals(deref(_other.schema),
+        return self.sp_schema.get().Equals(deref(other.schema),
                                            check_metadata)
 
     @classmethod
@@ -1060,7 +1036,7 @@ cdef class Schema:
             CDictionaryMemo* arg_dict_memo
 
         if dictionary_memo is not None:
-            arg_dict_memo = &dictionary_memo.memo
+            arg_dict_memo = dictionary_memo.memo
         else:
             arg_dict_memo = &temp_memo
 
@@ -1863,4 +1839,26 @@ def is_float_value(object obj):
     return IsPyFloat(obj)
 
 
-_extension_types_initializer = _ExtensionTypesInitializer()
+def _register_py_extension_type():
+    cdef:
+        DataType storage_type
+        shared_ptr[CExtensionType] cpy_ext_type
+
+    # Make a dummy C++ ExtensionType
+    storage_type = null()
+    check_status(CPyExtensionType.FromClass(storage_type.sp_type,
+                                            ExtensionType, &cpy_ext_type))
+    check_status(
+        RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))
+
+
+def _unregister_py_extension_type():
+    # This needs to be done explicitly before the Python interpreter is
+    # finalized.  If the C++ type is destroyed later in the process
+    # teardown stage, it will invoke CPython APIs such as Py_DECREF
+    # with a destroyed interpreter.
+    check_status(UnregisterPyExtensionType())
+
+
+_register_py_extension_type()
+atexit.register(_unregister_py_extension_type)
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index 2f0e9e7f466..73eabfebd28 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -1,5 +1,6 @@
+cython
+hypothesis
 pandas
+pathlib2; python_version < "3.4"
 pytest
-hypothesis
 pytz
-pathlib2; python_version < "3.4"
diff --git a/python/setup.cfg b/python/setup.cfg
index 9b8c7cec25c..452e4a41f6f 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -15,6 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+[metadata]
+license_files =
+  ../LICENSE.txt
+  ../NOTICE.txt
+
 [build_sphinx]
 source-dir = doc/
 build-dir  = doc/_build
diff --git a/python/setup.py b/python/setup.py
index 855cba5a1ad..099298e8773 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -384,10 +384,6 @@ def _run_cmake(self):
                         "{}_regex".format(self.boost_namespace),
                         implib_required=False)
                 if sys.platform == 'win32':
-                    # zlib uses zlib.dll for Windows
-                    zlib_lib_name = 'zlib'
-                    move_shared_libs(build_prefix, build_lib, zlib_lib_name,
-                                     implib_required=False)
                     if self.with_flight:
                         # DLL dependencies for gRPC / Flight
                         for lib_name in ['cares', 'libprotobuf',
@@ -506,11 +502,16 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name):
     lib_filename = os.path.basename(libs[0])
     shutil.move(pjoin(build_prefix, lib_filename),
                 pjoin(build_lib, 'pyarrow', lib_filename))
+    for lib in libs[1:]:
+        filename = os.path.basename(lib)
+        link_name = pjoin(build_lib, 'pyarrow', filename)
+        if not os.path.exists(link_name):
+            os.symlink(lib_filename, link_name)
 
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
-default_version = '0.14.0'
+default_version = '1.0.0-SNAPSHOT'
 if (not os.path.exists('../.git')
         and not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     if os.path.exists('PKG-INFO'):
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index cfbb3231340..263a29f719c 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 0.14.0
+Version: 0.14.1.9000
 Authors@R: c(
     person("Romain", "Fran\u00e7ois", email = "romain@rstudio.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2444-4226")),
     person("Jeroen", "Ooms", email = "jeroen@berkeley.edu", role = c("aut")),
diff --git a/r/Dockerfile.conda b/r/Dockerfile.conda
new file mode 100644
index 00000000000..2c664eb2886
--- /dev/null
+++ b/r/Dockerfile.conda
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM arrow:cpp
+
+# install R specific packages
+ARG R_VERSION=3.5.1
+COPY ci/conda_env_r.yml /arrow/ci/
+RUN conda install -q \
+        --file arrow/ci/conda_env_r.yml \
+        r-base=$R_VERSION \
+        nomkl && \
+    conda clean --all
+
+ENV ARROW_PYTHON=OFF \
+    ARROW_BUILD_TESTS=OFF
+
+# build, install, test R package
+CMD ["/bin/bash", "-c", "/arrow/ci/docker_build_cpp.sh && \
+    /arrow/ci/docker_build_r.sh"]
diff --git a/r/NEWS.md b/r/NEWS.md
index 67625372c12..8eb3975bb4d 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 0.14.0
+# arrow 0.14.1.9000
+
+# arrow 0.14.1
 
 Initial CRAN release of the `arrow` package. Key features include:
 
diff --git a/r/R/Table.R b/r/R/Table.R
index 51320fde98d..15ea48fe7c1 100644
--- a/r/R/Table.R
+++ b/r/R/Table.R
@@ -31,7 +31,8 @@
 #' @name arrow__Table
 `arrow::Table` <- R6Class("arrow::Table", inherit = `arrow::Object`,
   public = list(
-    column = function(i) shared_ptr(`arrow::Column`, Table__column(self, i)),
+    column = function(i) shared_ptr(`arrow::ChunkedArray`, Table__column(self, i)),
+    field = function(i) shared_ptr(`arrow::Field`, Table__field(self, i)),
 
     serialize = function(output_stream, ...) write_table(self, output_stream, ...),
 
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 951b83b9080..2031feaaaa5 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -204,22 +204,6 @@ ChunkArray__Slice2 <- function(chunked_array, offset, length){
     .Call(`_arrow_ChunkArray__Slice2` , chunked_array, offset, length)
 }
 
-Column__length <- function(column){
-    .Call(`_arrow_Column__length` , column)
-}
-
-Column__null_count <- function(column){
-    .Call(`_arrow_Column__null_count` , column)
-}
-
-Column__type <- function(column){
-    .Call(`_arrow_Column__type` , column)
-}
-
-Column__data <- function(column){
-    .Call(`_arrow_Column__data` , column)
-}
-
 util___Codec__Create <- function(codec){
     .Call(`_arrow_util___Codec__Create` , codec)
 }
@@ -948,6 +932,10 @@ Table__column <- function(table, i){
     .Call(`_arrow_Table__column` , table, i)
 }
 
+Table__field <- function(table, i){
+    .Call(`_arrow_Table__field` , table, i)
+}
+
 Table__columns <- function(table){
     .Call(`_arrow_Table__columns` , table)
 }
diff --git a/r/R/feather.R b/r/R/feather.R
index 75ab6104237..57c1dffae42 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -34,7 +34,7 @@
     num_rows = function() ipc___feather___TableReader__num_rows(self),
     num_columns = function() ipc___feather___TableReader__num_columns(self),
     GetColumnName = function(i) ipc___feather___TableReader__GetColumnName(self, i),
-    GetColumn = function(i) shared_ptr(`arrow::Column`, ipc___feather___TableReader__GetColumn(self, i)),
+    GetColumn = function(i) shared_ptr(`arrow::Array`, ipc___feather___TableReader__GetColumn(self, i)),
     Read = function(columns) {
       shared_ptr(`arrow::Table`, ipc___feather___TableReader__Read(self, columns))
     }
diff --git a/r/README.Rmd b/r/README.Rmd
index 6b8381769dd..9a6c0b9728a 100644
--- a/r/README.Rmd
+++ b/r/README.Rmd
@@ -84,20 +84,19 @@ You can specify a particular commit, branch, or [release](https://github.com/apa
 
 ## Developing
 
-If you need to alter both the Arrow C++ library and the R package code, or if you can't get a binary version of the latest C++ library elsewhere, you'll need to build it from source too.
+If you need to alter both the Arrow C++ library and the R package code, or if
+you can't get a binary version of the latest C++ library elsewhere, you'll need
+to build it from source too.
 
-First, clone the repository and install a release build of the C++ library.
+First, install the C++ library. See the [C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp.html) for details.
 
-```shell
-git clone https://github.com/apache/arrow.git
-mkdir arrow/cpp/build && cd arrow/cpp/build
-cmake .. -DARROW_PARQUET=ON -DARROW_BOOST_USE_SHARED:BOOL=Off -DARROW_INSTALL_NAME_RPATH=OFF
-make install
-```
+Note that after any change to the C++ library, you must reinstall it and run
+`make clean` or `git clean -fdx .` to remove any cached object code in the `r/`
+directory.
 
-This likely will require additional system libraries to be installed, the specifics of which are platform dependent. See the [C++ developer guide](https://arrow.apache.org/docs/developers/cpp.html) for details.
-
-Once you've built the C++ library, you can install the R package and its dependencies, along with additional dev dependencies, from the git checkout:
+Once you've built the C++ library, you can install the R package and its
+dependencies, along with additional dev dependencies, from the git checkout:
 
 ```shell
 cd ../../r
@@ -114,15 +113,27 @@ unable to load shared object '/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so':
 dlopen(/Users/you/R/00LOCK-r/00new/arrow/libs/arrow.so, 6): Library not loaded: @rpath/libarrow.14.dylib
 ```
 
-try setting the environment variable `LD_LIBRARY_PATH` (or `DYLD_LIBRARY_PATH` on macOS) to wherever Arrow C++ was put in `make install`, e.g. `export LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
+try setting the environment variable `LD_LIBRARY_PATH` (or `DYLD_LIBRARY_PATH`
+on macOS) to wherever Arrow C++ was put in `make install`, e.g. `export
+LD_LIBRARY_PATH=/usr/local/lib`, and retry installing the R package.
 
-For any other build/configuration challenges, see the [C++ developer guide](https://arrow.apache.org/docs/developers/cpp.html#building).
+For any other build/configuration challenges, see the [C++ developer
+guide](https://arrow.apache.org/docs/developers/cpp.html#building).
 
 ### Editing Rcpp code
 
-The `arrow` package uses some customized tools on top of `Rcpp` to prepare its C++ code in `src/`. If you change C++ code in the R package, you will need to set the `ARROW_R_DEV` environment variable to `TRUE` (optionally, add it to your`~/.Renviron` file to persist across sessions) so that the `data-raw/codegen.R` file is used for code generation.
+The `arrow` package uses some customized tools on top of `Rcpp` to prepare its
+C++ code in `src/`. If you change C++ code in the R package, you will need to
+set the `ARROW_R_DEV` environment variable to `TRUE` (optionally, add it to
+your`~/.Renviron` file to persist across sessions) so that the
+`data-raw/codegen.R` file is used for code generation.
 
-You'll also need `remotes::install_github("romainfrancois/decor")`.
+The codegen.R script has these dependencies:
+
+```
+remotes::install_github("romainfrancois/decor")
+install.packages(c("dplyr", "purrr", "glue"))
+```
 
 ### Useful functions
 
@@ -137,7 +148,9 @@ pkgdown::build_site(run_dont_run=TRUE) # To preview the documentation website
 devtools::check() # All package checks; see also below
 ```
 
-Any of those can be run from the command line by wrapping them in `R -e '$COMMAND'`. There's also a `Makefile` to help with some common tasks from the command line (`make test`, `make doc`, `make clean`, etc.)
+Any of those can be run from the command line by wrapping them in `R -e
+'$COMMAND'`. There's also a `Makefile` to help with some common tasks from the
+command line (`make test`, `make doc`, `make clean`, etc.)
 
 ### Full package validation
 
@@ -145,3 +158,5 @@ Any of those can be run from the command line by wrapping them in `R -e '$COMMAN
 R CMD build --keep-empty-dirs .
 R CMD check arrow_*.tar.gz --as-cran --no-manual
 ```
+
+[1]: https://github.com/apache/arrow/blob/master/docs/source/developers/cpp.rst
\ No newline at end of file
diff --git a/r/README.md b/r/README.md
index ddae0992a4d..43280f33c1b 100644
--- a/r/README.md
+++ b/r/README.md
@@ -48,6 +48,14 @@ library.
 
 ``` r
 library(arrow)
+#> 
+#> Attaching package: 'arrow'
+#> The following object is masked from 'package:utils':
+#> 
+#>     timestamp
+#> The following objects are masked from 'package:base':
+#> 
+#>     array, table
 set.seed(24)
 
 tab <- arrow::table(x = 1:10, y = rnorm(10))
@@ -125,20 +133,13 @@ If you need to alter both the Arrow C++ library and the R package code,
 or if you can’t get a binary version of the latest C++ library
 elsewhere, you’ll need to build it from source too.
 
-First, clone the repository and install a release build of the C++
-library.
-
-``` shell
-git clone https://github.com/apache/arrow.git
-mkdir arrow/cpp/build && cd arrow/cpp/build
-cmake .. -DARROW_PARQUET=ON -DARROW_BOOST_USE_SHARED:BOOL=Off -DARROW_INSTALL_NAME_RPATH=OFF
-make install
-```
-
-This likely will require additional system libraries to be installed,
-the specifics of which are platform dependent. See the [C++ developer
+First, install the C++ library. See the [C++ developer
 guide](https://arrow.apache.org/docs/developers/cpp.html) for details.
 
+Note that after any change to the C++ library, you must reinstall it and
+run `make clean` or `git clean -fdx .` to remove any cached object code
+in the `r/` directory.
+
 Once you’ve built the C++ library, you can install the R package and its
 dependencies, along with additional dev dependencies, from the git
 checkout:
@@ -173,7 +174,10 @@ you will need to set the `ARROW_R_DEV` environment variable to `TRUE`
 sessions) so that the `data-raw/codegen.R` file is used for code
 generation.
 
-You’ll also need `remotes::install_github("romainfrancois/decor")`.
+The codegen.R script has these dependencies:
+
+    remotes::install_github("romainfrancois/decor")
+    install.packages(c("dplyr", "purrr", "glue"))
 
 ### Useful functions
 
diff --git a/r/src/array__to_vector.cpp b/r/src/array__to_vector.cpp
index 7fcb02bef3c..1458d012c22 100644
--- a/r/src/array__to_vector.cpp
+++ b/r/src/array__to_vector.cpp
@@ -816,8 +816,8 @@ Rcpp::List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table,
   std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);
 
   for (int64_t i = 0; i < nc; i++) {
-    converters[i] = arrow::r::Converter::Make(table->column(i)->data()->chunks());
-    names[i] = table->column(i)->name();
+    converters[i] = arrow::r::Converter::Make(table->column(i)->chunks());
+    names[i] = table->field(i)->name();
   }
 
   if (use_threads) {
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 356f9ab2027..bcb0ac59b95 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -793,66 +793,6 @@ RcppExport SEXP _arrow_ChunkArray__Slice2(SEXP chunked_array_sexp, SEXP offset_s
 }
 #endif
 
-// column.cpp
-#if defined(ARROW_R_WITH_ARROW)
-int Column__length(const std::shared_ptr<arrow::Column>& column);
-RcppExport SEXP _arrow_Column__length(SEXP column_sexp){
-BEGIN_RCPP
-	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Column>&>::type column(column_sexp);
-	return Rcpp::wrap(Column__length(column));
-END_RCPP
-}
-#else
-RcppExport SEXP _arrow_Column__length(SEXP column_sexp){
-	Rf_error("Cannot call Column__length(). Please use arrow::install_arrow() to install required runtime libraries. ");
-}
-#endif
-
-// column.cpp
-#if defined(ARROW_R_WITH_ARROW)
-int Column__null_count(const std::shared_ptr<arrow::Column>& column);
-RcppExport SEXP _arrow_Column__null_count(SEXP column_sexp){
-BEGIN_RCPP
-	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Column>&>::type column(column_sexp);
-	return Rcpp::wrap(Column__null_count(column));
-END_RCPP
-}
-#else
-RcppExport SEXP _arrow_Column__null_count(SEXP column_sexp){
-	Rf_error("Cannot call Column__null_count(). Please use arrow::install_arrow() to install required runtime libraries. ");
-}
-#endif
-
-// column.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::DataType> Column__type(const std::shared_ptr<arrow::Column>& column);
-RcppExport SEXP _arrow_Column__type(SEXP column_sexp){
-BEGIN_RCPP
-	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Column>&>::type column(column_sexp);
-	return Rcpp::wrap(Column__type(column));
-END_RCPP
-}
-#else
-RcppExport SEXP _arrow_Column__type(SEXP column_sexp){
-	Rf_error("Cannot call Column__type(). Please use arrow::install_arrow() to install required runtime libraries. ");
-}
-#endif
-
-// column.cpp
-#if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::ChunkedArray> Column__data(const std::shared_ptr<arrow::Column>& column);
-RcppExport SEXP _arrow_Column__data(SEXP column_sexp){
-BEGIN_RCPP
-	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Column>&>::type column(column_sexp);
-	return Rcpp::wrap(Column__data(column));
-END_RCPP
-}
-#else
-RcppExport SEXP _arrow_Column__data(SEXP column_sexp){
-	Rf_error("Cannot call Column__data(). Please use arrow::install_arrow() to install required runtime libraries. ");
-}
-#endif
-
 // compression.cpp
 #if defined(ARROW_R_WITH_ARROW)
 std::unique_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec);
@@ -1982,7 +1922,7 @@ RcppExport SEXP _arrow_ipc___feather___TableReader__GetColumnName(SEXP reader_se
 
 // feather.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Column> ipc___feather___TableReader__GetColumn(const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, int i);
+std::shared_ptr<arrow::ChunkedArray> ipc___feather___TableReader__GetColumn(const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, int i);
 RcppExport SEXP _arrow_ipc___feather___TableReader__GetColumn(SEXP reader_sexp, SEXP i_sexp){
 BEGIN_RCPP
 	Rcpp::traits::input_parameter<const std::unique_ptr<arrow::ipc::feather::TableReader>&>::type reader(reader_sexp);
@@ -3641,7 +3581,7 @@ RcppExport SEXP _arrow_Table__schema(SEXP x_sexp){
 
 // table.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::Column> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
+std::shared_ptr<arrow::ChunkedArray> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
 RcppExport SEXP _arrow_Table__column(SEXP table_sexp, SEXP i_sexp){
 BEGIN_RCPP
 	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
@@ -3657,7 +3597,23 @@ RcppExport SEXP _arrow_Table__column(SEXP table_sexp, SEXP i_sexp){
 
 // table.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::vector<std::shared_ptr<arrow::Column>> Table__columns(const std::shared_ptr<arrow::Table>& table);
+std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table, int i);
+RcppExport SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){
+BEGIN_RCPP
+	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
+	Rcpp::traits::input_parameter<int>::type i(i_sexp);
+	return Rcpp::wrap(Table__field(table, i));
+END_RCPP
+}
+#else
+RcppExport SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){
+	Rf_error("Cannot call Table__field(). Please use arrow::install_arrow() to install required runtime libraries. ");
+}
+#endif
+
+// table.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::vector<std::shared_ptr<arrow::ChunkedArray>> Table__columns(const std::shared_ptr<arrow::Table>& table);
 RcppExport SEXP _arrow_Table__columns(SEXP table_sexp){
 BEGIN_RCPP
 	Rcpp::traits::input_parameter<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
@@ -3811,10 +3767,6 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, 
 		{ "_arrow_ChunkArray__Slice1", (DL_FUNC) &_arrow_ChunkArray__Slice1, 2}, 
 		{ "_arrow_ChunkArray__Slice2", (DL_FUNC) &_arrow_ChunkArray__Slice2, 3}, 
-		{ "_arrow_Column__length", (DL_FUNC) &_arrow_Column__length, 1}, 
-		{ "_arrow_Column__null_count", (DL_FUNC) &_arrow_Column__null_count, 1}, 
-		{ "_arrow_Column__type", (DL_FUNC) &_arrow_Column__type, 1}, 
-		{ "_arrow_Column__data", (DL_FUNC) &_arrow_Column__data, 1}, 
 		{ "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 1}, 
 		{ "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, 
 		{ "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, 
@@ -3997,6 +3949,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, 
 		{ "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, 
 		{ "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, 
+		{ "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, 
 		{ "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, 
 		{ "_arrow_Table__column_names", (DL_FUNC) &_arrow_Table__column_names, 1}, 
 		{ "_arrow_Table__select", (DL_FUNC) &_arrow_Table__select, 2}, 
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 4e4091017c1..b3a25b79c84 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -74,14 +74,11 @@ std::shared_ptr<arrow::Table> Table__cast(
     const std::shared_ptr<arrow::compute::CastOptions>& options) {
   auto nc = table->num_columns();
 
-  using ColumnVector = std::vector<std::shared_ptr<arrow::Column>>;
+  using ColumnVector = std::vector<std::shared_ptr<arrow::ChunkedArray>>;
   ColumnVector columns(nc);
   for (int i = 0; i < nc; i++) {
-    columns[i] = std::make_shared<arrow::Column>(
-        table->column(i)->name(),
-        ChunkedArray__cast(table->column(i)->data(), schema->field(i)->type(), options));
+    columns[i] = ChunkedArray__cast(table->column(i), schema->field(i)->type(), options);
   }
-
   return arrow::Table::Make(schema, std::move(columns), table->num_rows());
 }
 
diff --git a/r/src/feather.cpp b/r/src/feather.cpp
index a5198812647..7bdfeab72b2 100644
--- a/r/src/feather.cpp
+++ b/r/src/feather.cpp
@@ -106,9 +106,9 @@ std::string ipc___feather___TableReader__GetColumnName(
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::Column> ipc___feather___TableReader__GetColumn(
+std::shared_ptr<arrow::ChunkedArray> ipc___feather___TableReader__GetColumn(
     const std::unique_ptr<arrow::ipc::feather::TableReader>& reader, int i) {
-  std::shared_ptr<arrow::Column> column;
+  std::shared_ptr<arrow::ChunkedArray> column;
   STOP_IF_NOT_OK(reader->GetColumn(i, &column));
   return column;
 }
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 1e958d03eff..aebe3cb373c 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -47,16 +47,22 @@ std::shared_ptr<arrow::Schema> Table__schema(const std::shared_ptr<arrow::Table>
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::Column> Table__column(const std::shared_ptr<arrow::Table>& table,
-                                             int i) {
+std::shared_ptr<arrow::ChunkedArray> Table__column(
+    const std::shared_ptr<arrow::Table>& table, int i) {
   return table->column(i);
 }
 
 // [[arrow::export]]
-std::vector<std::shared_ptr<arrow::Column>> Table__columns(
+std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table,
+                                           int i) {
+  return table->field(i);
+}
+
+// [[arrow::export]]
+std::vector<std::shared_ptr<arrow::ChunkedArray>> Table__columns(
     const std::shared_ptr<arrow::Table>& table) {
   auto nc = table->num_columns();
-  std::vector<std::shared_ptr<arrow::Column>> res(nc);
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> res(nc);
   for (int i = 0; i < nc; i++) {
     res[i] = table->column(i);
   }
@@ -68,7 +74,7 @@ Rcpp::CharacterVector Table__column_names(const std::shared_ptr<arrow::Table>& t
   int nc = table->num_columns();
   Rcpp::CharacterVector res(nc);
   for (int i = 0; i < nc; i++) {
-    res[i] = table->column(i)->name();
+    res[i] = table->field(i)->name();
   }
   return res;
 }
@@ -79,7 +85,7 @@ std::shared_ptr<arrow::Table> Table__select(const std::shared_ptr<arrow::Table>&
   R_xlen_t n = indices.size();
 
   std::vector<std::shared_ptr<arrow::Field>> fields(n);
-  std::vector<std::shared_ptr<arrow::Column>> columns(n);
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(n);
 
   for (R_xlen_t i = 0; i < n; i++) {
     int pos = indices[i] - 1;
@@ -120,7 +126,7 @@ std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
   int num_fields;
   STOP_IF_NOT_OK(arrow::r::count_fields(lst, &num_fields));
 
-  std::vector<std::shared_ptr<arrow::Column>> columns(num_fields);
+  std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
   std::shared_ptr<arrow::Schema> schema;
 
   if (Rf_isNull(schema_sxp)) {
@@ -129,21 +135,18 @@ std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
     SEXP names = Rf_getAttrib(lst, R_NamesSymbol);
 
     auto fill_one_column = [&columns, &fields](int j, SEXP x, SEXP name) {
-      if (Rf_inherits(x, "arrow::Column")) {
-        columns[j] = arrow::r::extract<arrow::Column>(x);
-        fields[j] = columns[j]->field();
-      } else if (Rf_inherits(x, "arrow::ChunkedArray")) {
+      if (Rf_inherits(x, "arrow::ChunkedArray")) {
         auto chunked_array = arrow::r::extract<arrow::ChunkedArray>(x);
-        fields[j] = std::make_shared<arrow::Field>(CHAR(name), chunked_array->type());
-        columns[j] = std::make_shared<arrow::Column>(fields[j], chunked_array);
+        fields[j] = arrow::field(CHAR(name), chunked_array->type());
+        columns[j] = chunked_array;
       } else if (Rf_inherits(x, "arrow::Array")) {
         auto array = arrow::r::extract<arrow::Array>(x);
-        fields[j] = std::make_shared<arrow::Field>(CHAR(name), array->type());
-        columns[j] = std::make_shared<arrow::Column>(fields[j], array);
+        fields[j] = arrow::field(CHAR(name), array->type());
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
       } else {
         auto array = Array__from_vector(x, R_NilValue);
-        fields[j] = std::make_shared<arrow::Field>(CHAR(name), array->type());
-        columns[j] = std::make_shared<arrow::Column>(fields[j], array);
+        fields[j] = arrow::field(CHAR(name), array->type());
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
       }
     };
 
@@ -168,18 +171,16 @@ std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
     schema = arrow::r::extract<arrow::Schema>(schema_sxp);
 
     auto fill_one_column = [&columns, &schema](int j, SEXP x) {
-      if (Rf_inherits(x, "arrow::Column")) {
-        columns[j] = arrow::r::extract<arrow::Column>(x);
-      } else if (Rf_inherits(x, "arrow::ChunkedArray")) {
+      if (Rf_inherits(x, "arrow::ChunkedArray")) {
         auto chunked_array = arrow::r::extract<arrow::ChunkedArray>(x);
-        columns[j] = std::make_shared<arrow::Column>(schema->field(j), chunked_array);
+        columns[j] = chunked_array;
       } else if (Rf_inherits(x, "arrow::Array")) {
         auto array = arrow::r::extract<arrow::Array>(x);
-        columns[j] = std::make_shared<arrow::Column>(schema->field(j), array);
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
       } else {
         auto type = schema->field(j)->type();
         auto array = arrow::r::Array__from_vector(x, type, false);
-        columns[j] = std::make_shared<arrow::Column>(schema->field(j), array);
+        columns[j] = std::make_shared<arrow::ChunkedArray>(array);
       }
     };
 
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index 94139048192..b3e7d5638f5 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -113,7 +113,7 @@ test_that("Can read json file with nested columns (ARROW-5503)", {
     )
   )
 
-  struct_array <- tab1$column(1)$data()$chunk(0)
+  struct_array <- tab1$column(1)$chunk(0)
   ps <- array(c(NA, NA, 78, 90, NA, 19))
   hello <- array(c(NA, NA, "hi", "bonjour", "ciao", NA))
   expect_equal(struct_array$field(0L), ps)
@@ -132,7 +132,7 @@ test_that("Can read json file with nested columns (ARROW-5503)", {
     5,
     c(5, 6)
   )
-  list_array <- tab1$column(0)$data()
+  list_array <- tab1$column(0)
   expect_identical(
     list_array$as_vector(),
     list_array_r
diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R
index fb442a293cf..17d994deab2 100644
--- a/r/tests/testthat/test-read-write.R
+++ b/r/tests/testthat/test-read-write.R
@@ -28,14 +28,8 @@ test_that("arrow::table round trip", {
   expect_equal(tab$num_columns, 3L)
   expect_equal(tab$num_rows, 10L)
 
-  # arrow::Column
-  col_int <- tab$column(0)
-  expect_equal(col_int$length(), 10L)
-  expect_equal(col_int$null_count, 0L)
-  expect_equal(col_int$type, int32())
-
   # arrow::ChunkedArray
-  chunked_array_int <- col_int$data()
+  chunked_array_int <- tab$column(0)
   expect_equal(chunked_array_int$length(), 10L)
   expect_equal(chunked_array_int$null_count, 0L)
   expect_equal(chunked_array_int$as_vector(), tbl$int)
@@ -47,14 +41,8 @@ test_that("arrow::table round trip", {
     expect_equal(chunked_array_int$chunk(i-1L), chunks_int[[i]])
   }
 
-  # arrow::Column
-  col_dbl <- tab$column(1)
-  expect_equal(col_dbl$length(), 10L)
-  expect_equal(col_dbl$null_count, 0L)
-  expect_equal(col_dbl$type, float64())
-
   # arrow::ChunkedArray
-  chunked_array_dbl <- col_dbl$data()
+  chunked_array_dbl <- tab$column(1)
   expect_equal(chunked_array_dbl$length(), 10L)
   expect_equal(chunked_array_dbl$null_count, 0L)
   expect_equal(chunked_array_dbl$as_vector(), tbl$dbl)
@@ -66,14 +54,8 @@ test_that("arrow::table round trip", {
     expect_equal(chunked_array_dbl$chunk(i-1L), chunks_dbl[[i]])
   }
 
-  # arrow::Colmumn
-  col_raw <- tab$column(2)
-  expect_equal(col_raw$length(), 10L)
-  expect_equal(col_raw$null_count, 0L)
-  expect_equal(col_raw$type, int8())
-
   # arrow::ChunkedArray
-  chunked_array_raw <- col_raw$data()
+  chunked_array_raw <- tab$column(2)
   expect_equal(chunked_array_raw$length(), 10L)
   expect_equal(chunked_array_raw$null_count, 0L)
   expect_equal(chunked_array_raw$as_vector(), as.integer(tbl$raw))
@@ -130,4 +112,3 @@ test_that("arrow::table round trip handles NA in integer and numeric", {
   expect_true(is.na(res$dbl[10]))
   unlink(tf)
 })
-
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index 730fc98b11e..47b8f4f2cf7 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "0.14.0"
+  VERSION = "1.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/ext/arrow/raw-records.cpp b/ruby/red-arrow/ext/arrow/raw-records.cpp
index a2fab554536..9b94ec04d53 100644
--- a/ruby/red-arrow/ext/arrow/raw-records.cpp
+++ b/ruby/red-arrow/ext/arrow/raw-records.cpp
@@ -661,8 +661,7 @@ namespace red_arrow {
             rb_ary_push(records_, record);
           }
           for (int i = 0; i < n_columns_; ++i) {
-            const auto column = table.column(i).get();
-            const auto chunked_array = column->data();
+            const auto& chunked_array = table.column(i).get();
             column_index_ = i;
             row_offset_ = 0;
             for (const auto array : chunked_array->chunks()) {
diff --git a/ruby/red-arrow/lib/arrow/chunked-array.rb b/ruby/red-arrow/lib/arrow/chunked-array.rb
index c720d229c39..1f55cb82522 100644
--- a/ruby/red-arrow/lib/arrow/chunked-array.rb
+++ b/ruby/red-arrow/lib/arrow/chunked-array.rb
@@ -19,6 +19,11 @@ module Arrow
   class ChunkedArray
     include Enumerable
 
+    alias_method :size, :n_rows
+    unless method_defined?(:length)
+      alias_method :length, :n_rows
+    end
+
     alias_method :chunks_raw, :chunks
     def chunks
       @chunks ||= chunks_raw
diff --git a/ruby/red-arrow/lib/arrow/column-containable.rb b/ruby/red-arrow/lib/arrow/column-containable.rb
new file mode 100644
index 00000000000..51ad88e7080
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/column-containable.rb
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  module ColumnContainable
+    def columns
+      @columns ||= schema.n_fields.times.collect do |i|
+        Column.new(self, i)
+      end
+    end
+
+    def each_column(&block)
+      columns.each(&block)
+    end
+
+    def find_column(name_or_index)
+      case name_or_index
+      when String, Symbol
+        name = name_or_index.to_s
+        index = schema.get_field_index(name)
+        return nil if index == -1
+        Column.new(self, index)
+      when Integer
+        index = name_or_index
+        index += n_columns if index < 0
+        return nil if index < 0 or index >= n_columns
+        Column.new(self, index)
+      else
+        message = "column name or index must be String, Symbol or Integer"
+        raise ArgumentError, message
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/column.rb b/ruby/red-arrow/lib/arrow/column.rb
index de385c04622..06f3dbdc05b 100644
--- a/ruby/red-arrow/lib/arrow/column.rb
+++ b/ruby/red-arrow/lib/arrow/column.rb
@@ -19,32 +19,58 @@ module Arrow
   class Column
     include Enumerable
 
+    attr_reader :container
+    attr_reader :field
+    attr_reader :data
+    def initialize(container, index)
+      @container = container
+      @index = index
+      @field = @container.schema[@index]
+      @data = @container.get_column_data(@index)
+    end
+
+    def name
+      @field.name
+    end
+
+    def data_type
+      @field.data_type
+    end
+
     def null?(i)
-      data.null?(i)
+      @data.null?(i)
     end
 
     def valid?(i)
-      data.valid?(i)
+      @data.valid?(i)
     end
 
     def [](i)
-      data[i]
+      @data[i]
     end
 
     def each(&block)
-      return to_enum(__method__) unless block_given?
-
-      data.each(&block)
+      @data.each(&block)
     end
 
     def reverse_each(&block)
-      return to_enum(__method__) unless block_given?
+      @data.reverse_each(&block)
+    end
+
+    def n_rows
+      @data.n_rows
+    end
+    alias_method :size, :n_rows
+    alias_method :length, :n_rows
 
-      data.reverse_each(&block)
+    def n_nulls
+      @data.n_nulls
     end
 
-    def pack
-      self.class.new(field, data.pack)
+    def ==(other)
+      other.is_a?(self.class) and
+        @field == other.field and
+        @data == other.data
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/data-type.rb b/ruby/red-arrow/lib/arrow/data-type.rb
index 5b1c873029a..9411785b437 100644
--- a/ruby/red-arrow/lib/arrow/data-type.rb
+++ b/ruby/red-arrow/lib/arrow/data-type.rb
@@ -29,24 +29,33 @@ class << self
       #
       #   @return [Arrow::DataType] The given data type itself.
       #
-      # @overload resolve(name, *arguments)
+      # @overload resolve(name)
       #
       #   Creates a suitable data type from type name. For example,
       #   you can create {Arrow::BooleanDataType} from `:boolean`.
       #
       #   @param name [String, Symbol] The type name of the data type.
       #
-      #   @param arguments [::Array] The additional information of the
-      #     data type.
+      #   @example Create a boolean data type
+      #     Arrow::DataType.resolve(:boolean)
+      #
+      # @overload resolve(name_with_arguments)
+      #
+      #   Creates a suitable data type from type name with arguments.
+      #
+      #   @param name_with_arguments [::Array<String, ...>]
+      #     The type name of the data type as the first element.
+      #
+      #     The rest elements are additional information of the data type.
       #
       #     For example, {Arrow::TimestampDataType} needs unit as
       #     additional information.
       #
       #   @example Create a boolean data type
-      #     Arrow::DataType.resolve(:boolean)
+      #     Arrow::DataType.resolve([:boolean])
       #
       #   @example Create a milliseconds unit timestamp data type
-      #     Arrow::DataType.resolve(:timestamp, :milli)
+      #     Arrow::DataType.resolve([:timestamp, :milli])
       #
       # @overload resolve(description)
       #
@@ -135,5 +144,13 @@ def resolve_class(data_type)
         Arrow.const_get(data_type_class_name)
       end
     end
+
+    def build_array(values)
+      base_name = self.class.name.gsub(/DataType\z/, "")
+      builder_class = self.class.const_get("#{base_name}ArrayBuilder")
+      args = [values]
+      args.unshift(self) unless builder_class.buildable?(args)
+      builder_class.build(*args)
+    end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/field-containable.rb b/ruby/red-arrow/lib/arrow/field-containable.rb
index 1956dde12c9..e4dbf4ec26c 100644
--- a/ruby/red-arrow/lib/arrow/field-containable.rb
+++ b/ruby/red-arrow/lib/arrow/field-containable.rb
@@ -24,6 +24,9 @@ def find_field(name_or_index)
         get_field_by_name(name)
       when Integer
         index = name_or_index
+        raise if index < 0
+        index += n_fields if index < 0
+        return nil if index < 0 or index >= n_fields
         get_field(index)
       else
         message = "field name or index must be String, Symbol or Integer"
diff --git a/ruby/red-arrow/lib/arrow/group.rb b/ruby/red-arrow/lib/arrow/group.rb
index 7ef8dc3d74c..568e0e8c3fe 100644
--- a/ruby/red-arrow/lib/arrow/group.rb
+++ b/ruby/red-arrow/lib/arrow/group.rb
@@ -152,24 +152,21 @@ def aggregate(target_columns)
       end
 
       grouped_key_arrays_raw = grouped_keys.transpose
-      columns = @keys.collect.with_index do |key, i|
+      fields = []
+      arrays = []
+      @keys.each_with_index do |key, i|
         key_column = @table[key]
-        key_column_array_class = key_column.data.chunks.first.class
-        if key_column_array_class == TimestampArray
-          builder = TimestampArrayBuilder.new(key_column.data_type)
-          key_column_array = builder.build(grouped_key_arrays_raw[i])
-        else
-          key_column_array =
-            key_column_array_class.new(grouped_key_arrays_raw[i])
-        end
-        Column.new(key_column.field, key_column_array)
+        key_column_array_raw = grouped_key_arrays_raw[i]
+        key_column_array = key_column.data_type.build_array(key_column_array_raw)
+        fields << key_column.field
+        arrays << key_column_array
       end
       target_columns.each_with_index do |column, i|
         array = ArrayBuilder.build(aggregated_arrays_raw[i])
-        field = Field.new(column.name, array.value_data_type)
-        columns << Column.new(field, array)
+        arrays << array
+        fields << Field.new(column.field.name, array.value_data_type)
       end
-      Table.new(columns)
+      Table.new(fields, arrays)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb b/ruby/red-arrow/lib/arrow/record-batch.rb
index b577d4a41a6..60fd42ec4ac 100644
--- a/ruby/red-arrow/lib/arrow/record-batch.rb
+++ b/ruby/red-arrow/lib/arrow/record-batch.rb
@@ -15,10 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require "arrow/column-containable"
 require "arrow/record-containable"
 
 module Arrow
   class RecordBatch
+    include ColumnContainable
     include RecordContainable
     include Enumerable
 
@@ -40,10 +42,10 @@ def new(*args)
 
     alias_method :each, :each_record
 
-    alias_method :columns_raw, :columns
-    def columns
-      @columns ||= columns_raw
-    end
+    alias_method :size, :n_rows
+    alias_method :length, :n_rows
+
+    alias_method :[], :find_column
 
     # Converts the record batch to {Arrow::Table}.
     #
diff --git a/ruby/red-arrow/lib/arrow/record-containable.rb b/ruby/red-arrow/lib/arrow/record-containable.rb
index f73d1a8c126..20c9ac2f576 100644
--- a/ruby/red-arrow/lib/arrow/record-containable.rb
+++ b/ruby/red-arrow/lib/arrow/record-containable.rb
@@ -17,12 +17,6 @@
 
 module Arrow
   module RecordContainable
-    def each_column(&block)
-      return to_enum(__method__) unless block_given?
-
-      columns.each(&block)
-    end
-
     def each_record(reuse_record: false)
       unless block_given?
         return to_enum(__method__, reuse_record: reuse_record)
@@ -40,34 +34,5 @@ def each_record(reuse_record: false)
         end
       end
     end
-
-    def find_column(name_or_index)
-      case name_or_index
-      when String, Symbol
-        name = name_or_index.to_s
-        index = resolve_column_name(name)
-        return nil if index.nil?
-        columns[index]
-      when Integer
-        index = name_or_index
-        columns[index]
-      else
-        message = "column name or index must be String, Symbol or Integer"
-        raise ArgumentError, message
-      end
-    end
-
-    private
-    def resolve_column_name(name)
-      (@column_name_to_index ||= build_column_name_resolve_table)[name]
-    end
-
-    def build_column_name_resolve_table
-      table = {}
-      schema.fields.each_with_index do |field, i|
-        table[field.name] = i
-      end
-      table
-    end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/record.rb b/ruby/red-arrow/lib/arrow/record.rb
index 70bd215033f..6f83dded0c3 100644
--- a/ruby/red-arrow/lib/arrow/record.rb
+++ b/ruby/red-arrow/lib/arrow/record.rb
@@ -17,38 +17,41 @@
 
 module Arrow
   class Record
+    attr_reader :container
     attr_accessor :index
-    def initialize(record_container, index)
-      @record_container = record_container
+    def initialize(container, index)
+      @container = container
       @index = index
     end
 
     def [](column_name_or_column_index)
-      column = @record_container.find_column(column_name_or_column_index)
+      column = @container.find_column(column_name_or_column_index)
       return nil if column.nil?
       column[@index]
     end
 
-    def columns
-      @record_container.columns
+    def to_a
+      @container.columns.collect do |column|
+        column[@index]
+      end
     end
 
     def to_h
       attributes = {}
-      @record_container.schema.fields.each_with_index do |field, i|
-        attributes[field.name] = self[i]
+      @container.columns.each do |column|
+        attributes[column.name] = column[@index]
       end
       attributes
     end
 
     def respond_to_missing?(name, include_private)
-      return true if @record_container.find_column(name)
+      return true if @container.find_column(name)
       super
     end
 
     def method_missing(name, *args, &block)
       if args.empty?
-        column = @record_container.find_column(name)
+        column = @container.find_column(name)
         return column[@index] if column
       end
       super
diff --git a/ruby/red-arrow/lib/arrow/slicer.rb b/ruby/red-arrow/lib/arrow/slicer.rb
index fd2033d37cb..fa834766866 100644
--- a/ruby/red-arrow/lib/arrow/slicer.rb
+++ b/ruby/red-arrow/lib/arrow/slicer.rb
@@ -253,9 +253,9 @@ def evaluate
         case @value
         when nil
           if @column.n_nulls.zero?
-            raw_array = [true] * @column.length
+            raw_array = [true] * @column.n_rows
           else
-            raw_array = @column.length.times.collect do |i|
+            raw_array = @column.n_rows.times.collect do |i|
               @column.valid?(i)
             end
           end
diff --git a/ruby/red-arrow/lib/arrow/table-loader.rb b/ruby/red-arrow/lib/arrow/table-loader.rb
index 15bd9ee3a32..204b4f87754 100644
--- a/ruby/red-arrow/lib/arrow/table-loader.rb
+++ b/ruby/red-arrow/lib/arrow/table-loader.rb
@@ -88,17 +88,11 @@ def open_input_stream
 
     def load_raw(input, reader)
       schema = reader.schema
-      chunked_arrays = []
+      record_batches = []
       reader.each do |record_batch|
-        record_batch.columns.each_with_index do |array, i|
-          chunked_array = (chunked_arrays[i] ||= [])
-          chunked_array << array
-        end
-      end
-      columns = schema.fields.collect.with_index do |field, i|
-        Column.new(field, ChunkedArray.new(chunked_arrays[i]))
+        record_batches << record_batch
       end
-      table = Table.new(schema, columns)
+      table = Table.new(schema, record_batches)
       table.instance_variable_set(:@input, input)
       table
     end
diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb
index 64f4b49fc51..c0ce502beca 100644
--- a/ruby/red-arrow/lib/arrow/table.rb
+++ b/ruby/red-arrow/lib/arrow/table.rb
@@ -15,11 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
+require "arrow/column-containable"
 require "arrow/group"
 require "arrow/record-containable"
 
 module Arrow
   class Table
+    include ColumnContainable
     include RecordContainable
 
     class << self
@@ -74,6 +76,24 @@ def load(path, options={})
     #     Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks),
     #                      "visible" => Arrow::ChunkedArray.new(visible_chunks))
     #
+    # @overload initialize(raw_table)
+    #
+    #   @param raw_table [Hash<String, ::Array>]
+    #     The pairs of column name and values of the table. Column values is
+    #     `Array`.
+    #
+    #   @example Create a table from column name and values
+    #     count_chunks = [
+    #       Arrow::UInt32Array.new([0, 2]),
+    #       Arrow::UInt32Array.new([nil, 4]),
+    #     ]
+    #     visible_chunks = [
+    #       Arrow::BooleanArray.new([true]),
+    #       Arrow::BooleanArray.new([nil, nil, false]),
+    #     ]
+    #     Arrow::Table.new("count" => [0, 2, nil, 4],
+    #                      "visible" => [true, nil, nil, false])
+    #
     # @overload initialize(schema, columns)
     #
     #   @param schema [Arrow::Schema] The schema of the table.
@@ -152,17 +172,18 @@ def initialize(*args)
       case n_args
       when 1
         if args[0][0].is_a?(Column)
-          values = args[0]
-          fields = values.collect(&:field)
+          columns = args[0]
+          fields = columns.collect(&:field)
+          values = columns.collect(&:data)
           schema = Schema.new(fields)
         else
           raw_table = args[0]
           fields = []
           values = []
           raw_table.each do |name, array|
-            field = Field.new(name.to_s, array.value_data_type)
-            fields << field
-            values << Column.new(field, array)
+            array = ArrayBuilder.build(array) if array.is_a?(::Array)
+            fields << Field.new(name.to_s, array.value_data_type)
+            values << array
           end
           schema = Schema.new(fields)
         end
@@ -170,20 +191,19 @@ def initialize(*args)
         schema = args[0]
         schema = Schema.new(schema) unless schema.is_a?(Schema)
         values = args[1]
-        if values[0].is_a?(::Array)
+        case values[0]
+        when ::Array
           values = [RecordBatch.new(schema, values)]
+        when Column
+          values = values.collect(&:data)
         end
       else
-        message = "wrong number of arguments (given, #{n_args}, expected 1..2)"
+        message = "wrong number of arguments (given #{n_args}, expected 1..2)"
         raise ArgumentError, message
       end
       initialize_raw(schema, values)
     end
 
-    def columns
-      @columns ||= n_columns.times.collect {|i| get_column(i)}
-    end
-
     def each_record_batch
       return to_enum(__method__) unless block_given?
 
@@ -338,7 +358,7 @@ def merge(other)
         other.each do |name, value|
           name = name.to_s
           if value
-            added_columns[name] = ensure_column(name, value)
+            added_columns[name] = ensure_raw_column(name, value)
           else
             removed_columns[name] = true
           end
@@ -346,7 +366,8 @@ def merge(other)
       when Table
         added_columns = {}
         other.columns.each do |column|
-          added_columns[column.name] = column
+          name = column.name
+          added_columns[name] = ensure_raw_column(name, column)
         end
       else
         message = "merge target must be Hash or Arrow::Table: " +
@@ -363,15 +384,18 @@ def merge(other)
           next
         end
         next if removed_columns.key?(column_name)
-        new_columns << column
+        new_columns << ensure_raw_column(column_name, column)
       end
       added_columns.each do |name, new_column|
         new_columns << new_column
       end
-      new_fields = new_columns.collect do |new_column|
-        new_column.field
+      new_fields = []
+      new_arrays = []
+      new_columns.each do |new_column|
+        new_fields << new_column[:field]
+        new_arrays << new_column[:data]
       end
-      self.class.new(Schema.new(new_fields), new_columns)
+      self.class.new(new_fields, new_arrays)
     end
 
     alias_method :remove_column_raw, :remove_column
@@ -447,10 +471,10 @@ def save(path, options={})
     end
 
     def pack
-      packed_columns = columns.collect do |column|
-        column.pack
+      packed_arrays = columns.collect do |column|
+        column.data.pack
       end
-      self.class.new(schema, packed_columns)
+      self.class.new(schema, packed_arrays)
     end
 
     alias_method :to_s_raw, :to_s
@@ -524,13 +548,26 @@ def slice_by_ranges(ranges)
       end
     end
 
-    def ensure_column(name, data)
+    def ensure_raw_column(name, data)
       case data
       when Array
-        field = Field.new(name, data.value_data_type)
-        Column.new(field, data)
+        {
+          field: Field.new(name, data.value_data_type),
+          data: ChunkedArray.new([data]),
+        }
+      when ChunkedArray
+        {
+          field: Field.new(name, data.value_data_type),
+          data: data,
+        }
       when Column
-        data
+        column = data
+        data = column.data
+        data = ChunkedArray.new([data]) unless data.is_a?(ChunkedArray)
+        {
+          field: column.field,
+          data: data,
+        }
       else
         message = "column must be Arrow::Array or Arrow::Column: " +
           "<#{name}>: <#{data.inspect}>: #{inspect}"
diff --git a/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb b/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
index 4b22682efca..17efaa0c446 100644
--- a/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/timestamp-array-builder.rb
@@ -17,6 +17,25 @@
 
 module Arrow
   class TimestampArrayBuilder
+    class << self
+      def build(unit_or_data_type, values)
+        builder = new(unit_or_data_type)
+        builder.build(values)
+      end
+    end
+
+    alias_method :initialize_raw, :initialize
+    def initialize(unit_or_data_type)
+      case unit_or_data_type
+      when DataType
+        data_type = unit_or_data_type
+      else
+        unit = unit_or_data_type
+        data_type = TimestampDataType.new(unit)
+      end
+      initialize_raw(data_type)
+    end
+
     private
     def unit_id
       @unit_id ||= value_data_type.unit.nick.to_sym
diff --git a/ruby/red-arrow/lib/arrow/timestamp-array.rb b/ruby/red-arrow/lib/arrow/timestamp-array.rb
index 6cffb8c261c..3262c23a918 100644
--- a/ruby/red-arrow/lib/arrow/timestamp-array.rb
+++ b/ruby/red-arrow/lib/arrow/timestamp-array.rb
@@ -17,14 +17,6 @@
 
 module Arrow
   class TimestampArray
-    class << self
-      def new(unit, values)
-        data_type = TimestampDataType.new(unit)
-        builder = TimestampArrayBuilder.new(data_type)
-        builder.build(values)
-      end
-    end
-
     def get_value(i)
       to_time(get_raw_value(i))
     end
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index f6b06a49fb0..7b50309fdd8 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "0.14.0"
+  VERSION = "1.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/test/raw-records/test-dense-union-array.rb b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
index c79c093d550..5e267660eb1 100644
--- a/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
@@ -52,7 +52,7 @@ def build_record_batch(type, records)
       end
       sub_record_batch = Arrow::RecordBatch.new(sub_schema,
                                                 sub_records)
-      sub_record_batch.columns[0]
+      sub_record_batch.columns[0].data
     end
     records.each do |record|
       column = record[0]
diff --git a/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
index f80592ff41a..c761cc64743 100644
--- a/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
@@ -47,7 +47,7 @@ def build_record_batch(type, records)
       end
       sub_record_batch = Arrow::RecordBatch.new(sub_schema,
                                                 sub_records)
-      sub_record_batch.columns[0]
+      sub_record_batch.columns[0].data
     end
     records.each do |record|
       column = record[0]
diff --git a/ruby/red-arrow/test/test-column.rb b/ruby/red-arrow/test/test-column.rb
index 81bf0e09a5b..613b01ccc7b 100644
--- a/ruby/red-arrow/test/test-column.rb
+++ b/ruby/red-arrow/test/test-column.rb
@@ -16,51 +16,76 @@
 # under the License.
 
 class ColumnTest < Test::Unit::TestCase
+  def setup
+    table = Arrow::Table.new("visible" => [true, nil, false])
+    @column = table.visible
+  end
+
+  test("#name") do
+    assert_equal("visible", @column.name)
+  end
+
+  test("#data_type") do
+    assert_equal(Arrow::BooleanDataType.new, @column.data_type)
+  end
+
+  test("#null?") do
+    assert do
+      @column.null?(1)
+    end
+  end
+
+  test("#valid?") do
+    assert do
+      @column.valid?(0)
+    end
+  end
+
   test("#each") do
-    arrays = [
-      Arrow::BooleanArray.new([true, false]),
-      Arrow::BooleanArray.new([nil, true]),
-    ]
-    chunked_array = Arrow::ChunkedArray.new(arrays)
-    column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
-                               chunked_array)
-    assert_equal([true, false, nil, true],
-                 column.to_a)
+    assert_equal([true, nil, false], @column.each.to_a)
+  end
+
+  test("#reverse_each") do
+    assert_equal([false, nil, true], @column.reverse_each.to_a)
   end
 
-  test("#pack") do
-    arrays = [
-      Arrow::BooleanArray.new([true, false]),
-      Arrow::BooleanArray.new([nil, true]),
-    ]
-    chunked_array = Arrow::ChunkedArray.new(arrays)
-    column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
-                               chunked_array)
-    packed_column = column.pack
-    assert_equal([1, [true, false, nil, true]],
-                 [packed_column.data.n_chunks, packed_column.to_a])
+  test("#n_rows") do
+    assert_equal(3, @column.n_rows)
+  end
+
+  test("#n_nulls") do
+    assert_equal(1, @column.n_nulls)
   end
 
   sub_test_case("#==") do
-    def setup
-      arrays = [
-        Arrow::BooleanArray.new([true]),
-        Arrow::BooleanArray.new([false, true]),
-      ]
-      chunked_array = Arrow::ChunkedArray.new(arrays)
-      @column = Arrow::Column.new(Arrow::Field.new("visible", :boolean),
-                                  chunked_array)
+    test("same value") do
+      table1 = Arrow::Table.new("visible" => [true, false])
+      table2 = Arrow::Table.new("visible" => [true, false])
+      assert do
+        table1.visible == table2.visible
+      end
+    end
+
+    test("different name") do
+      table1 = Arrow::Table.new("visible" => [true, false])
+      table2 = Arrow::Table.new("invisible" => [true, false])
+      assert do
+        not table1.visible == table2.invisible
+      end
     end
 
-    test("Arrow::Column") do
+    test("different value") do
+      table1 = Arrow::Table.new("visible" => [true, false])
+      table2 = Arrow::Table.new("visible" => [true, true])
       assert do
-        @column == @column
+        not table1.visible == table2.visible
       end
     end
 
     test("not Arrow::Column") do
+      table = Arrow::Table.new("visible" => [true, false])
       assert do
-        not (@column == 29)
+        not table.visible == 29
       end
     end
   end
diff --git a/ruby/red-arrow/test/test-slicer.rb b/ruby/red-arrow/test/test-slicer.rb
index ba035b604a0..a661888d3ec 100644
--- a/ruby/red-arrow/test/test-slicer.rb
+++ b/ruby/red-arrow/test/test-slicer.rb
@@ -36,9 +36,7 @@ def setup
     ]
     @count_array = Arrow::ChunkedArray.new(count_arrays)
     @visible_array = Arrow::ChunkedArray.new(visible_arrays)
-    @count_column = Arrow::Column.new(@count_field, @count_array)
-    @visible_column = Arrow::Column.new(@visible_field, @visible_array)
-    @table = Arrow::Table.new(schema, [@count_column, @visible_column])
+    @table = Arrow::Table.new(schema, [@count_array, @visible_array])
   end
 
   sub_test_case("column") do
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index dce5d25758c..e3b43cd31f3 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -37,14 +37,15 @@ def setup
     ]
     @count_array = Arrow::ChunkedArray.new(count_arrays)
     @visible_array = Arrow::ChunkedArray.new(visible_arrays)
-    @count_column = Arrow::Column.new(@count_field, @count_array)
-    @visible_column = Arrow::Column.new(@visible_field, @visible_array)
-    @table = Arrow::Table.new(schema, [@count_column, @visible_column])
+    @table = Arrow::Table.new(schema, [@count_array, @visible_array])
   end
 
   test("#columns") do
-    assert_equal(["count", "visible"],
-                 @table.columns.collect(&:name))
+    assert_equal([
+                   Arrow::Column.new(@table, 0),
+                   Arrow::Column.new(@table, 1),
+                 ],
+                 @table.columns)
   end
 
   sub_test_case("#slice") do
@@ -188,11 +189,18 @@ def setup
 
   sub_test_case("#[]") do
     test("[String]") do
-      assert_equal(@count_column, @table["count"])
+      assert_equal(Arrow::Column.new(@table, 0),
+                   @table["count"])
     end
 
     test("[Symbol]") do
-      assert_equal(@visible_column, @table[:visible])
+      assert_equal(Arrow::Column.new(@table, 1),
+                   @table[:visible])
+    end
+
+    test("[Integer]") do
+      assert_equal(Arrow::Column.new(@table, 1),
+                   @table[-1])
     end
   end
 
@@ -279,7 +287,8 @@ def setup
   end
 
   test("column name getter") do
-    assert_equal(@visible_column, @table.visible)
+    assert_equal(Arrow::Column.new(@table, 1),
+                 @table.visible)
   end
 
   sub_test_case("#remove_column") do
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 4707d878e6a..5eee3693c1b 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "0.14.0"
+  VERSION = "1.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index 2a59809fd13..cc3b9ed5008 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "0.14.0"
+  VERSION = "1.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/test/test-arrow-table.rb b/ruby/red-parquet/test/test-arrow-table.rb
index 1a565b64451..fc2a6c998c6 100644
--- a/ruby/red-parquet/test/test-arrow-table.rb
+++ b/ruby/red-parquet/test/test-arrow-table.rb
@@ -35,9 +35,7 @@ def setup
     ]
     @count_array = Arrow::ChunkedArray.new(count_arrays)
     @visible_array = Arrow::ChunkedArray.new(visible_arrays)
-    @count_column = Arrow::Column.new(@count_field, @count_array)
-    @visible_column = Arrow::Column.new(@visible_field, @visible_array)
-    @table = Arrow::Table.new(schema, [@count_column, @visible_column])
+    @table = Arrow::Table.new(schema, [@count_array, @visible_array])
   end
 
   def test_save_load_path
diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb
index dab013cc780..6aa8fc5b578 100644
--- a/ruby/red-plasma/lib/plasma/version.rb
+++ b/ruby/red-plasma/lib/plasma/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Plasma
-  VERSION = "0.14.0"
+  VERSION = "1.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 864f2de9403..0ceb135643f 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "arrow"
-version = "0.14.0"
+version = "1.0.0-SNAPSHOT"
 description = "Rust implementation of Apache Arrow"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
@@ -77,6 +77,10 @@ harness = false
 name = "comparison_kernels"
 harness = false
 
+[[bench]]
+name = "take_kernels"
+harness = false
+
 [[bench]]
 name = "csv_writer"
 harness = false
diff --git a/rust/arrow/benches/arithmetic_kernels.rs b/rust/arrow/benches/arithmetic_kernels.rs
index 855355d9f5c..e9851684702 100644
--- a/rust/arrow/benches/arithmetic_kernels.rs
+++ b/rust/arrow/benches/arithmetic_kernels.rs
@@ -24,7 +24,6 @@ use std::sync::Arc;
 extern crate arrow;
 
 use arrow::array::*;
-use arrow::builder::*;
 use arrow::compute::array_ops::{limit, sum};
 use arrow::compute::kernels::arithmetic::*;
 use arrow::error::Result;
diff --git a/rust/arrow/benches/array_from_vec.rs b/rust/arrow/benches/array_from_vec.rs
index f9357140922..1918e61c913 100644
--- a/rust/arrow/benches/array_from_vec.rs
+++ b/rust/arrow/benches/array_from_vec.rs
@@ -22,7 +22,6 @@ use criterion::Criterion;
 extern crate arrow;
 
 use arrow::array::*;
-use arrow::array_data::ArrayDataBuilder;
 use arrow::buffer::Buffer;
 use arrow::datatypes::*;
 
diff --git a/rust/arrow/benches/boolean_kernels.rs b/rust/arrow/benches/boolean_kernels.rs
index d01c9df920a..3a544ace4f5 100644
--- a/rust/arrow/benches/boolean_kernels.rs
+++ b/rust/arrow/benches/boolean_kernels.rs
@@ -22,7 +22,6 @@ use criterion::Criterion;
 extern crate arrow;
 
 use arrow::array::*;
-use arrow::builder::*;
 use arrow::compute::kernels::boolean as boolean_kernels;
 use arrow::error::{ArrowError, Result};
 
diff --git a/rust/arrow/benches/builder.rs b/rust/arrow/benches/builder.rs
index 70369804f87..c13874be60a 100644
--- a/rust/arrow/benches/builder.rs
+++ b/rust/arrow/benches/builder.rs
@@ -25,7 +25,7 @@ use criterion::*;
 use rand::distributions::Standard;
 use rand::{thread_rng, Rng};
 
-use arrow::builder::*;
+use arrow::array::*;
 
 // Build arrays with 512k elements.
 const BATCH_SIZE: usize = 8 << 10;
diff --git a/rust/arrow/benches/comparison_kernels.rs b/rust/arrow/benches/comparison_kernels.rs
index bd75b6ac3f7..77f6d8361d4 100644
--- a/rust/arrow/benches/comparison_kernels.rs
+++ b/rust/arrow/benches/comparison_kernels.rs
@@ -22,7 +22,6 @@ use criterion::Criterion;
 extern crate arrow;
 
 use arrow::array::*;
-use arrow::builder::*;
 use arrow::compute::*;
 
 fn create_array(size: usize) -> Float32Array {
diff --git a/rust/arrow/benches/take_kernels.rs b/rust/arrow/benches/take_kernels.rs
new file mode 100644
index 00000000000..ee420808348
--- /dev/null
+++ b/rust/arrow/benches/take_kernels.rs
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+use criterion::Criterion;
+use rand::distributions::{Distribution, Standard};
+use rand::prelude::random;
+use rand::Rng;
+
+use std::sync::Arc;
+
+extern crate arrow;
+
+use arrow::array::*;
+use arrow::compute::{cast, take};
+use arrow::datatypes::*;
+
+// cast array from specified primitive array type to desired data type
+fn create_numeric<T>(size: usize) -> ArrayRef
+where
+    T: ArrowNumericType,
+    Standard: Distribution<T::Native>,
+    PrimitiveArray<T>: std::convert::From<Vec<T::Native>>,
+{
+    Arc::new(PrimitiveArray::<T>::from(vec![random::<T::Native>(); size])) as ArrayRef
+}
+
+fn create_random_index(size: usize) -> UInt32Array {
+    let mut rng = rand::thread_rng();
+    let ints = Int32Array::from(vec![rng.gen_range(-24i32, size as i32); size]);
+    // cast to u32, conveniently marking negative values as nulls
+    UInt32Array::from(
+        cast(&(Arc::new(ints) as ArrayRef), &DataType::UInt32)
+            .unwrap()
+            .data(),
+    )
+}
+
+fn take_numeric<T>(size: usize, index_len: usize) -> ()
+where
+    T: ArrowNumericType,
+    Standard: Distribution<T::Native>,
+    PrimitiveArray<T>: std::convert::From<Vec<T::Native>>,
+    T::Native: num::NumCast,
+{
+    let array = create_numeric::<T>(size);
+    let index = create_random_index(index_len);
+    criterion::black_box(take(&array, &index, None).unwrap());
+}
+
+fn take_boolean(size: usize, index_len: usize) -> () {
+    let array = Arc::new(BooleanArray::from(vec![random::<bool>(); size])) as ArrayRef;
+    let index = create_random_index(index_len);
+    criterion::black_box(take(&array, &index, None).unwrap());
+}
+
+fn add_benchmark(c: &mut Criterion) {
+    c.bench_function("take u8 256", |b| {
+        b.iter(|| take_numeric::<UInt8Type>(256, 256))
+    });
+    c.bench_function("take u8 512", |b| {
+        b.iter(|| take_numeric::<UInt8Type>(512, 512))
+    });
+    c.bench_function("take u8 1024", |b| {
+        b.iter(|| take_numeric::<UInt8Type>(1024, 1024))
+    });
+    c.bench_function("take i32 256", |b| {
+        b.iter(|| take_numeric::<Int32Type>(256, 256))
+    });
+    c.bench_function("take i32 512", |b| {
+        b.iter(|| take_numeric::<Int32Type>(512, 512))
+    });
+    c.bench_function("take i32 1024", |b| {
+        b.iter(|| take_numeric::<Int32Type>(1024, 1024))
+    });
+    c.bench_function("take bool 256", |b| b.iter(|| take_boolean(256, 256)));
+    c.bench_function("take bool 512", |b| b.iter(|| take_boolean(512, 512)));
+    c.bench_function("take bool 1024", |b| b.iter(|| take_boolean(1024, 1024)));
+}
+
+criterion_group!(benches, add_benchmark);
+criterion_main!(benches);
diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs
index f4af117f489..e4e55d06650 100644
--- a/rust/arrow/src/array/array.rs
+++ b/rust/arrow/src/array/array.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use std::any::Any;
-use std::convert::From;
+use std::convert::{From, TryFrom};
 use std::fmt;
 use std::io::Write;
 use std::mem;
@@ -27,6 +27,7 @@ use chrono::prelude::*;
 use super::*;
 use crate::buffer::{Buffer, MutableBuffer};
 use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
 use crate::memory;
 use crate::util::bit_util;
 
@@ -41,7 +42,7 @@ const NANOSECONDS: i64 = 1_000_000_000;
 
 /// Trait for dealing with different types of array at runtime when the type of the
 /// array is not known in advance
-pub trait Array: Send + Sync {
+pub trait Array: Send + Sync + ArrayEqual {
     /// Returns the array as `Any` so that it can be downcast to a specific implementation
     fn as_any(&self) -> &Any;
 
@@ -194,6 +195,45 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
     raw_values: RawPtrBox<T::Native>,
 }
 
+/// Common operations for primitive types, including numeric types and boolean type.
+pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
+    fn values(&self) -> Buffer;
+    fn value(&self, i: usize) -> T::Native;
+}
+
+// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods with
+// `ArrowPrimitiveType`. It doesn't have any implementation as the actual implementations
+// are delegated to that of `ArrowNumericType` and `BooleanType`.
+impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+    default fn values(&self) -> Buffer {
+        unimplemented!()
+    }
+
+    default fn value(&self, _: usize) -> T::Native {
+        unimplemented!()
+    }
+}
+
+impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
+    fn values(&self) -> Buffer {
+        self.values()
+    }
+
+    fn value(&self, i: usize) -> T::Native {
+        self.value(i)
+    }
+}
+
+impl PrimitiveArrayOps<BooleanType> for BooleanArray {
+    fn values(&self) -> Buffer {
+        self.values()
+    }
+
+    fn value(&self, i: usize) -> bool {
+        self.value(i)
+    }
+}
+
 impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
     fn as_any(&self) -> &Any {
         self
@@ -271,7 +311,6 @@ where
     ///
     /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
     /// A valid value is expected, thus the user should first check for validity.
-    /// TODO: extract constants into static variables
     pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
         let v = i64::from(self.value(i));
         match self.data_type() {
@@ -651,6 +690,23 @@ impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> {
     }
 }
 
+/// Common operations for List types, currently `ListArray` and `BinaryArray`.
+pub trait ListArrayOps {
+    fn value_offset_at(&self, i: usize) -> i32;
+}
+
+impl ListArrayOps for ListArray {
+    fn value_offset_at(&self, i: usize) -> i32 {
+        self.value_offset_at(i)
+    }
+}
+
+impl ListArrayOps for BinaryArray {
+    fn value_offset_at(&self, i: usize) -> i32 {
+        self.value_offset_at(i)
+    }
+}
+
 /// A list array where each element is a variable-sized sequence of values with the same
 /// type.
 pub struct ListArray {
@@ -784,6 +840,16 @@ impl BinaryArray {
         self.value_offset_at(i + 1) - self.value_offset_at(i)
     }
 
+    /// Returns a clone of the value offset buffer
+    pub fn value_offsets(&self) -> Buffer {
+        self.data.buffers()[0].clone()
+    }
+
+    /// Returns a clone of the value data buffer
+    pub fn value_data(&self) -> Buffer {
+        self.data.buffers()[1].clone()
+    }
+
     #[inline]
     fn value_offset_at(&self, i: usize) -> i32 {
         unsafe { *self.value_offsets.get().offset(i as isize) }
@@ -831,7 +897,7 @@ impl<'a> From<Vec<&'a str>> for BinaryArray {
     }
 }
 
-impl<'a> From<Vec<&[u8]>> for BinaryArray {
+impl From<Vec<&[u8]>> for BinaryArray {
     fn from(v: Vec<&[u8]>) -> Self {
         let mut offsets = Vec::with_capacity(v.len() + 1);
         let mut values = Vec::new();
@@ -851,6 +917,22 @@ impl<'a> From<Vec<&[u8]>> for BinaryArray {
     }
 }
 
+impl<'a> TryFrom<Vec<Option<&'a str>>> for BinaryArray {
+    type Error = ArrowError;
+
+    fn try_from(v: Vec<Option<&'a str>>) -> Result<Self> {
+        let mut builder = BinaryBuilder::new(v.len());
+        for val in v {
+            if let Some(s) = val {
+                builder.append_string(s)?;
+            } else {
+                builder.append(false)?;
+            }
+        }
+        Ok(builder.finish())
+    }
+}
+
 /// Creates a `BinaryArray` from `List<u8>` array
 impl From<ListArray> for BinaryArray {
     fn from(v: ListArray) -> Self {
@@ -907,6 +989,16 @@ impl StructArray {
     pub fn column(&self, pos: usize) -> &ArrayRef {
         &self.boxed_fields[pos]
     }
+
+    /// Return the number of fields in this struct array
+    pub fn num_columns(&self) -> usize {
+        self.boxed_fields.len()
+    }
+
+    /// Returns the fields of the struct array
+    pub fn columns(&self) -> Vec<&ArrayRef> {
+        self.boxed_fields.iter().collect()
+    }
 }
 
 impl From<ArrayDataRef> for StructArray {
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index b0b97c22107..da0357b4924 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -467,11 +467,21 @@ impl BinaryBuilder {
     ///
     /// Note, when appending individual byte values you must call `append` to delimit each
     /// distinct list value.
-    pub fn append_value(&mut self, value: u8) -> Result<()> {
+    pub fn append_byte(&mut self, value: u8) -> Result<()> {
         self.builder.values().append_value(value)?;
         Ok(())
     }
 
+    /// Appends a byte slice into the builder.
+    ///
+    /// Automatically calls the `append` method to delimit the slice appended in as a
+    /// distinct array element.
+    pub fn append_value(&mut self, value: &[u8]) -> Result<()> {
+        self.builder.values().append_slice(value)?;
+        self.builder.append(true)?;
+        Ok(())
+    }
+
     /// Appends a `&String` or `&str` into the builder.
     ///
     /// Automatically calls the `append` method to delimit the string appended in as a
@@ -1156,18 +1166,18 @@ mod tests {
     fn test_binary_array_builder() {
         let mut builder = BinaryBuilder::new(20);
 
-        builder.append_value(b'h').unwrap();
-        builder.append_value(b'e').unwrap();
-        builder.append_value(b'l').unwrap();
-        builder.append_value(b'l').unwrap();
-        builder.append_value(b'o').unwrap();
+        builder.append_byte(b'h').unwrap();
+        builder.append_byte(b'e').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'o').unwrap();
         builder.append(true).unwrap();
         builder.append(true).unwrap();
-        builder.append_value(b'w').unwrap();
-        builder.append_value(b'o').unwrap();
-        builder.append_value(b'r').unwrap();
-        builder.append_value(b'l').unwrap();
-        builder.append_value(b'd').unwrap();
+        builder.append_byte(b'w').unwrap();
+        builder.append_byte(b'o').unwrap();
+        builder.append_byte(b'r').unwrap();
+        builder.append_byte(b'l').unwrap();
+        builder.append_byte(b'd').unwrap();
         builder.append(true).unwrap();
 
         let array = builder.finish();
diff --git a/rust/arrow/src/array/equal.rs b/rust/arrow/src/array/equal.rs
new file mode 100644
index 00000000000..5f888ab5eac
--- /dev/null
+++ b/rust/arrow/src/array/equal.rs
@@ -0,0 +1,741 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::*;
+use crate::datatypes::*;
+use crate::util::bit_util;
+
+/// Trait for `Array` equality.
+pub trait ArrayEqual {
+    /// Returns true if this array is equal to the `other` array
+    fn equals(&self, other: &dyn Array) -> bool;
+
+    /// Returns true if the range [start_idx, end_idx) is equal to
+    /// [other_start_idx, other_start_idx + end_idx - start_idx) in the `other` array
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool;
+}
+
+impl<T: ArrowPrimitiveType> ArrayEqual for PrimitiveArray<T> {
+    default fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let value_buf = self.data_ref().buffers()[0].clone();
+        let other_value_buf = other.data_ref().buffers()[0].clone();
+        let byte_width = T::get_bit_width() / 8;
+
+        if self.null_count() > 0 {
+            let values = value_buf.data();
+            let other_values = other_value_buf.data();
+
+            for i in 0..self.len() {
+                if self.is_valid(i) {
+                    let start = (i + self.offset()) * byte_width;
+                    let data = &values[start..(start + byte_width)];
+                    let other_start = (i + other.offset()) * byte_width;
+                    let other_data =
+                        &other_values[other_start..(other_start + byte_width)];
+                    if data != other_data {
+                        return false;
+                    }
+                }
+            }
+        } else {
+            let start = self.offset() * byte_width;
+            let other_start = other.offset() * byte_width;
+            let len = self.len() * byte_width;
+            let data = &value_buf.data()[start..(start + len)];
+            let other_data = &other_value_buf.data()[other_start..(other_start + len)];
+            if data != other_data {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    default fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+            if is_null != other_is_null || (!is_null && self.value(i) != other.value(j)) {
+                return false;
+            }
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for BooleanArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let values = self.data_ref().buffers()[0].data();
+        let other_values = other.data_ref().buffers()[0].data();
+
+        // TODO: we can do this more efficiently if all values are not-null
+        for i in 0..self.len() {
+            if self.is_valid(i) {
+                if bit_util::get_bit(values, i + self.offset())
+                    != bit_util::get_bit(other_values, i + other.offset())
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+}
+
+impl<T: ArrowNumericType> PartialEq for PrimitiveArray<T> {
+    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
+        self.equals(other)
+    }
+}
+
+impl ArrayEqual for ListArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+        if !value_offset_equal(self, other) {
+            return false;
+        }
+
+        if !self.values().range_equals(
+            &*other.values(),
+            self.value_offset(0) as usize,
+            self.value_offset(self.len()) as usize,
+            other.value_offset(0) as usize,
+        ) {
+            return false;
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<ListArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+
+            let start_offset = self.value_offset(i) as usize;
+            let end_offset = self.value_offset(i + 1) as usize;
+            let other_start_offset = other.value_offset(j) as usize;
+            let other_end_offset = other.value_offset(j + 1) as usize;
+
+            if end_offset - start_offset != other_end_offset - other_start_offset {
+                return false;
+            }
+
+            if !self.values().range_equals(
+                &*other.values(),
+                start_offset,
+                end_offset,
+                other_start_offset,
+            ) {
+                return false;
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for BinaryArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+        if !value_offset_equal(self, other) {
+            return false;
+        }
+
+        // TODO: handle null & length == 0 case?
+
+        let value_buf = self.value_data();
+        let other_value_buf = other.value_data();
+        let value_data = value_buf.data();
+        let other_value_data = other_value_buf.data();
+
+        if self.null_count() == 0 {
+            // No offset in both - just do memcmp
+            if self.offset() == 0 && other.offset() == 0 {
+                let len = self.value_offset(self.len()) as usize;
+                return value_data[..len] == other_value_data[..len];
+            } else {
+                let start = self.value_offset(0) as usize;
+                let other_start = other.value_offset(0) as usize;
+                let len = (self.value_offset(self.len()) - self.value_offset(0)) as usize;
+                return value_data[start..(start + len)]
+                    == other_value_data[other_start..(other_start + len)];
+            }
+        } else {
+            for i in 0..self.len() {
+                if self.is_null(i) {
+                    continue;
+                }
+
+                let start = self.value_offset(i) as usize;
+                let other_start = other.value_offset(i) as usize;
+                let len = self.value_length(i) as usize;
+                if value_data[start..(start + len)]
+                    != other_value_data[other_start..(other_start + len)]
+                {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<BinaryArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(j);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+
+            let start_offset = self.value_offset(i) as usize;
+            let end_offset = self.value_offset(i + 1) as usize;
+            let other_start_offset = other.value_offset(j) as usize;
+            let other_end_offset = other.value_offset(j + 1) as usize;
+
+            if end_offset - start_offset != other_end_offset - other_start_offset {
+                return false;
+            }
+
+            let value_buf = self.value_data();
+            let other_value_buf = other.value_data();
+            let value_data = value_buf.data();
+            let other_value_data = other_value_buf.data();
+
+            if end_offset - start_offset > 0 {
+                let len = end_offset - start_offset;
+                if value_data[start_offset..(start_offset + len)]
+                    != other_value_data[other_start_offset..(other_start_offset + len)]
+                {
+                    return false;
+                }
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+impl ArrayEqual for StructArray {
+    fn equals(&self, other: &dyn Array) -> bool {
+        if !base_equal(&self.data(), &other.data()) {
+            return false;
+        }
+
+        let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+        for i in 0..self.len() {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(i);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+            for j in 0..self.num_columns() {
+                if !self.column(j).range_equals(&**other.column(j), i, i + 1, i) {
+                    return false;
+                }
+            }
+        }
+
+        true
+    }
+
+    fn range_equals(
+        &self,
+        other: &dyn Array,
+        start_idx: usize,
+        end_idx: usize,
+        other_start_idx: usize,
+    ) -> bool {
+        assert!(other_start_idx + (end_idx - start_idx) <= other.len());
+        let other = other.as_any().downcast_ref::<StructArray>().unwrap();
+
+        let mut j = other_start_idx;
+        for i in start_idx..end_idx {
+            let is_null = self.is_null(i);
+            let other_is_null = other.is_null(i);
+
+            if is_null != other_is_null {
+                return false;
+            }
+
+            if is_null {
+                continue;
+            }
+            for k in 0..self.num_columns() {
+                if !self.column(k).range_equals(&**other.column(k), i, i + 1, j) {
+                    return false;
+                }
+            }
+
+            j += 1;
+        }
+
+        true
+    }
+}
+
+// Compare if the common basic fields between the two arrays are equal
+fn base_equal(this: &ArrayDataRef, other: &ArrayDataRef) -> bool {
+    if this.data_type() != other.data_type() {
+        return false;
+    }
+    if this.len != other.len {
+        return false;
+    }
+    if this.null_count != other.null_count {
+        return false;
+    }
+    if this.null_count > 0 {
+        let null_bitmap = this.null_bitmap().as_ref().unwrap();
+        let other_null_bitmap = other.null_bitmap().as_ref().unwrap();
+        let null_buf = null_bitmap.bits.data();
+        let other_null_buf = other_null_bitmap.bits.data();
+        for i in 0..this.len() {
+            if bit_util::get_bit(null_buf, i + this.offset())
+                != bit_util::get_bit(other_null_buf, i + other.offset())
+            {
+                return false;
+            }
+        }
+    }
+    true
+}
+
+// Compare if the value offsets are equal between the two list arrays
+fn value_offset_equal<T: Array + ListArrayOps>(this: &T, other: &T) -> bool {
+    // Check if offsets differ
+    if this.offset() == 0 && other.offset() == 0 {
+        let offset_data = &this.data_ref().buffers()[0];
+        let other_offset_data = &other.data_ref().buffers()[0];
+        return offset_data.data()[0..((this.len() + 1) * 4)]
+            == other_offset_data.data()[0..((other.len() + 1) * 4)];
+    }
+
+    // The expensive case
+    for i in 0..this.len() + 1 {
+        if this.value_offset_at(i) - this.value_offset_at(0)
+            != other.value_offset_at(i) - other.value_offset_at(0)
+        {
+            return false;
+        }
+    }
+
+    true
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use std::convert::TryFrom;
+
+    use crate::error::Result;
+
+    #[test]
+    fn test_primitive_equal() {
+        let a = Int32Array::from(vec![1, 2, 3]);
+        let b = Int32Array::from(vec![1, 2, 3]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = Int32Array::from(vec![1, 2, 4]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+        let b = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = Int32Array::from(vec![Some(1), None, None, Some(3)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = Int32Array::from(vec![Some(1), None, Some(2), Some(4)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(1, 2);
+        let b_slice = b.slice(1, 2);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_boolean_equal() {
+        let a = BooleanArray::from(vec![false, false, true]);
+        let b = BooleanArray::from(vec![false, false, true]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BooleanArray::from(vec![false, false, false]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+        let b = BooleanArray::from(vec![Some(false), None, None, Some(true)]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BooleanArray::from(vec![None, None, None, Some(true)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = BooleanArray::from(vec![Some(true), None, None, Some(true)]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a = BooleanArray::from(vec![false, true, false, true, false, false, true]);
+        let b = BooleanArray::from(vec![false, false, false, true, false, true, true]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let a_slice = a.slice(2, 3);
+        let b_slice = b.slice(2, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(3, 4);
+        let b_slice = b.slice(3, 4);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_list_equal() {
+        let mut a_builder = ListBuilder::new(Int32Builder::new(10));
+        let mut b_builder = ListBuilder::new(Int32Builder::new(10));
+
+        let a = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+            .unwrap();
+        let b = create_list_array(&mut b_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 6])])
+            .unwrap();
+
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = create_list_array(&mut a_builder, &[Some(&[1, 2, 3]), Some(&[4, 5, 7])])
+            .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+        )
+        .unwrap();
+        let b = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 4]), None, None],
+        )
+        .unwrap();
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = create_list_array(
+            &mut a_builder,
+            &[
+                Some(&[1, 2]),
+                None,
+                Some(&[5, 6]),
+                Some(&[3, 4]),
+                None,
+                None,
+            ],
+        )
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = create_list_array(
+            &mut a_builder,
+            &[Some(&[1, 2]), None, None, Some(&[3, 5]), None, None],
+        )
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(0, 3);
+        let b_slice = b.slice(0, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(0, 5);
+        let b_slice = b.slice(0, 5);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(4, 1);
+        let b_slice = b.slice(4, 1);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_binary_equal() {
+        let a = BinaryArray::from(vec!["hello", "world"]);
+        let b = BinaryArray::from(vec!["hello", "world"]);
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BinaryArray::from(vec!["hello", "arrow"]);
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where null_count > 0
+
+        let a = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            Some("foo"),
+            None,
+            Some("world"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        let b = BinaryArray::try_from(vec![
+            Some("hello"),
+            None,
+            None,
+            Some("arrow"),
+            None,
+            None,
+        ])
+        .unwrap();
+        assert!(!a.equals(&b));
+        assert!(!b.equals(&a));
+
+        // Test the case where offset != 0
+
+        let a_slice = a.slice(0, 3);
+        let b_slice = b.slice(0, 3);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(0, 5);
+        let b_slice = b.slice(0, 5);
+        assert!(!a_slice.equals(&*b_slice));
+        assert!(!b_slice.equals(&*a_slice));
+
+        let a_slice = a.slice(4, 1);
+        let b_slice = b.slice(4, 1);
+        assert!(a_slice.equals(&*b_slice));
+        assert!(b_slice.equals(&*a_slice));
+    }
+
+    #[test]
+    fn test_struct_equal() {
+        let string_builder = BinaryBuilder::new(5);
+        let int_builder = Int32Builder::new(5);
+
+        let mut fields = Vec::new();
+        let mut field_builders = Vec::new();
+        fields.push(Field::new("f1", DataType::Utf8, false));
+        field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>);
+        fields.push(Field::new("f2", DataType::Int32, false));
+        field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>);
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+
+        let a = create_struct_array(
+            &mut builder,
+            &[Some("joe"), None, None, Some("mark"), Some("doe")],
+            &[Some(1), Some(2), None, Some(4), Some(5)],
+            &[true, true, false, true, true],
+        )
+        .unwrap();
+        let b = create_struct_array(
+            &mut builder,
+            &[Some("joe"), None, None, Some("mark"), Some("doe")],
+            &[Some(1), Some(2), None, Some(4), Some(5)],
+            &[true, true, false, true, true],
+        )
+        .unwrap();
+
+        assert!(a.equals(&b));
+        assert!(b.equals(&a));
+    }
+
+    fn create_list_array<'a, U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(
+        builder: &'a mut ListBuilder<Int32Builder>,
+        data: T,
+    ) -> Result<ListArray> {
+        for d in data.as_ref() {
+            if let Some(v) = d {
+                builder.values().append_slice(v.as_ref())?;
+                builder.append(true)?
+            } else {
+                builder.append(false)?
+            }
+        }
+        Ok(builder.finish())
+    }
+
+    fn create_struct_array<
+        'a,
+        T: AsRef<[Option<&'a str>]>,
+        U: AsRef<[Option<i32>]>,
+        V: AsRef<[bool]>,
+    >(
+        builder: &'a mut StructBuilder,
+        first: T,
+        second: U,
+        is_valid: V,
+    ) -> Result<StructArray> {
+        let string_builder = builder.field_builder::<BinaryBuilder>(0).unwrap();
+        for v in first.as_ref() {
+            if let Some(s) = v {
+                string_builder.append_string(s)?;
+            } else {
+                string_builder.append_null()?;
+            }
+        }
+
+        let int_builder = builder.field_builder::<Int32Builder>(1).unwrap();
+        for v in second.as_ref() {
+            if let Some(i) = v {
+                int_builder.append_value(*i)?;
+            } else {
+                int_builder.append_null()?;
+            }
+        }
+
+        for v in is_valid.as_ref() {
+            builder.append(*v)?
+        }
+
+        Ok(builder.finish())
+    }
+}
diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs
index aa14f0f2284..47e4219c865 100644
--- a/rust/arrow/src/array/mod.rs
+++ b/rust/arrow/src/array/mod.rs
@@ -57,6 +57,11 @@
 mod array;
 mod builder;
 mod data;
+mod equal;
+
+use crate::datatypes::*;
+
+// --------------------- Array & ArrayData ---------------------
 
 pub use self::array::Array;
 pub use self::array::ArrayRef;
@@ -64,7 +69,41 @@ pub use self::data::ArrayData;
 pub use self::data::ArrayDataBuilder;
 pub use self::data::ArrayDataRef;
 
-use crate::datatypes::*;
+pub use self::array::BinaryArray;
+pub use self::array::ListArray;
+pub use self::array::PrimitiveArray;
+pub use self::array::StructArray;
+
+pub(crate) use self::array::make_array;
+
+pub type BooleanArray = PrimitiveArray<BooleanType>;
+pub type Int8Array = PrimitiveArray<Int8Type>;
+pub type Int16Array = PrimitiveArray<Int16Type>;
+pub type Int32Array = PrimitiveArray<Int32Type>;
+pub type Int64Array = PrimitiveArray<Int64Type>;
+pub type UInt8Array = PrimitiveArray<UInt8Type>;
+pub type UInt16Array = PrimitiveArray<UInt16Type>;
+pub type UInt32Array = PrimitiveArray<UInt32Type>;
+pub type UInt64Array = PrimitiveArray<UInt64Type>;
+pub type Float32Array = PrimitiveArray<Float32Type>;
+pub type Float64Array = PrimitiveArray<Float64Type>;
+
+pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
+pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
+pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
+pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
+pub type Date32Array = PrimitiveArray<Date32Type>;
+pub type Date64Array = PrimitiveArray<Date64Type>;
+pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
+pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
+pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
+pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
+// TODO add interval
+
+pub use self::array::ListArrayOps;
+pub use self::array::PrimitiveArrayOps;
+
+// --------------------- Array Builder ---------------------
 
 pub use self::builder::BufferBuilder;
 pub use self::builder::BufferBuilderTrait;
@@ -92,7 +131,12 @@ pub type Time32MillisecondBufferBuilder = BufferBuilder<Time32MillisecondType>;
 pub type Time64MicrosecondBufferBuilder = BufferBuilder<Time64MicrosecondType>;
 pub type Time64NanosecondBufferBuilder = BufferBuilder<Time64NanosecondType>;
 
+pub use self::builder::ArrayBuilder;
+pub use self::builder::BinaryBuilder;
+pub use self::builder::ListBuilder;
 pub use self::builder::PrimitiveBuilder;
+pub use self::builder::StructBuilder;
+
 pub type BooleanBuilder = PrimitiveBuilder<BooleanType>;
 pub type Int8Builder = PrimitiveBuilder<Int8Type>;
 pub type Int16Builder = PrimitiveBuilder<Int16Type>;
@@ -116,37 +160,6 @@ pub type Time32MillisecondBuilder = PrimitiveBuilder<Time32MillisecondType>;
 pub type Time64MicrosecondBuilder = PrimitiveBuilder<Time64MicrosecondType>;
 pub type Time64NanosecondBuilder = PrimitiveBuilder<Time64NanosecondType>;
 
-pub use self::builder::BinaryBuilder;
-pub use self::builder::ListBuilder;
-pub use self::builder::StructBuilder;
-
-pub use self::array::BinaryArray;
-pub use self::array::ListArray;
-pub use self::array::PrimitiveArray;
-pub use self::array::StructArray;
-
-pub(crate) use self::array::make_array;
-
-pub type BooleanArray = PrimitiveArray<BooleanType>;
-pub type Int8Array = PrimitiveArray<Int8Type>;
-pub type Int16Array = PrimitiveArray<Int16Type>;
-pub type Int32Array = PrimitiveArray<Int32Type>;
-pub type Int64Array = PrimitiveArray<Int64Type>;
-pub type UInt8Array = PrimitiveArray<UInt8Type>;
-pub type UInt16Array = PrimitiveArray<UInt16Type>;
-pub type UInt32Array = PrimitiveArray<UInt32Type>;
-pub type UInt64Array = PrimitiveArray<UInt64Type>;
-pub type Float32Array = PrimitiveArray<Float32Type>;
-pub type Float64Array = PrimitiveArray<Float64Type>;
+// --------------------- Array Equality ---------------------
 
-pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
-pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>;
-pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>;
-pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>;
-pub type Date32Array = PrimitiveArray<Date32Type>;
-pub type Date64Array = PrimitiveArray<Date64Type>;
-pub type Time32SecondArray = PrimitiveArray<Time32SecondType>;
-pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>;
-pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>;
-pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>;
-// TODO add interval
+pub use self::equal::ArrayEqual;
diff --git a/rust/arrow/src/compute/kernels/mod.rs b/rust/arrow/src/compute/kernels/mod.rs
index 2483f519b97..ae1ab0cc45d 100644
--- a/rust/arrow/src/compute/kernels/mod.rs
+++ b/rust/arrow/src/compute/kernels/mod.rs
@@ -21,4 +21,5 @@ pub mod arithmetic;
 pub mod boolean;
 pub mod cast;
 pub mod comparison;
+pub mod take;
 pub mod temporal;
diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs
new file mode 100644
index 00000000000..6cce7fb47d9
--- /dev/null
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -0,0 +1,595 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Defines take kernel for `ArrayRef`
+
+use std::sync::Arc;
+
+use crate::array::*;
+use crate::buffer::{Buffer, MutableBuffer};
+use crate::compute::util::take_value_indices_from_list;
+use crate::datatypes::*;
+use crate::error::{ArrowError, Result};
+use crate::util::bit_util;
+
+use TimeUnit::*;
+
+/// Take elements from `ArrayRef` by supplying an array of indices.
+///
+/// Supports:
+///  * null indices, returning a null value for the index
+///  * checking for overflowing indices
+pub fn take(
+    values: &ArrayRef,
+    indices: &UInt32Array,
+    options: Option<TakeOptions>,
+) -> Result<ArrayRef> {
+    let options = options.unwrap_or(Default::default());
+    if options.check_bounds {
+        let len = values.len();
+        for i in 0..indices.len() {
+            if indices.is_valid(i) {
+                let ix = indices.value(i) as usize;
+                if ix >= len {
+                    return Err(ArrowError::ComputeError(
+                    format!("Array index out of bounds, cannot get item at index {} from {} entries", ix, len))
+                );
+                }
+            }
+        }
+    }
+    match values.data_type() {
+        DataType::Boolean => take_primitive::<BooleanType>(values, indices),
+        DataType::Int8 => take_primitive::<Int8Type>(values, indices),
+        DataType::Int16 => take_primitive::<Int16Type>(values, indices),
+        DataType::Int32 => take_primitive::<Int32Type>(values, indices),
+        DataType::Int64 => take_primitive::<Int64Type>(values, indices),
+        DataType::UInt8 => take_primitive::<UInt8Type>(values, indices),
+        DataType::UInt16 => take_primitive::<UInt16Type>(values, indices),
+        DataType::UInt32 => take_primitive::<UInt32Type>(values, indices),
+        DataType::UInt64 => take_primitive::<UInt64Type>(values, indices),
+        DataType::Float32 => take_primitive::<Float32Type>(values, indices),
+        DataType::Float64 => take_primitive::<Float64Type>(values, indices),
+        DataType::Date32(_) => take_primitive::<Date32Type>(values, indices),
+        DataType::Date64(_) => take_primitive::<Date64Type>(values, indices),
+        DataType::Time32(Second) => take_primitive::<Time32SecondType>(values, indices),
+        DataType::Time32(Millisecond) => {
+            take_primitive::<Time32MillisecondType>(values, indices)
+        }
+        DataType::Time64(Microsecond) => {
+            take_primitive::<Time64MicrosecondType>(values, indices)
+        }
+        DataType::Time64(Nanosecond) => {
+            take_primitive::<Time64NanosecondType>(values, indices)
+        }
+        DataType::Timestamp(Second) => {
+            take_primitive::<TimestampSecondType>(values, indices)
+        }
+        DataType::Timestamp(Millisecond) => {
+            take_primitive::<TimestampMillisecondType>(values, indices)
+        }
+        DataType::Timestamp(Microsecond) => {
+            take_primitive::<TimestampMicrosecondType>(values, indices)
+        }
+        DataType::Timestamp(Nanosecond) => {
+            take_primitive::<TimestampNanosecondType>(values, indices)
+        }
+        DataType::Utf8 => take_binary(values, indices),
+        DataType::List(_) => take_list(values, indices),
+        DataType::Struct(fields) => {
+            let struct_: &StructArray =
+                values.as_any().downcast_ref::<StructArray>().unwrap();
+            let arrays: Result<Vec<ArrayRef>> = struct_
+                .columns()
+                .iter()
+                .map(|a| take(a, indices, Some(options.clone())))
+                .collect();
+            let arrays = arrays?;
+            let pairs: Vec<(Field, ArrayRef)> =
+                fields.clone().into_iter().zip(arrays).collect();
+            Ok(Arc::new(StructArray::from(pairs)) as ArrayRef)
+        }
+        t @ _ => unimplemented!("Take not supported for data type {:?}", t),
+    }
+}
+
+/// Options that define how `take` should behave
+#[derive(Clone)]
+pub struct TakeOptions {
+    /// Perform bounds check before taking indices from values.
+    /// If enabled, an `ArrowError` is returned if the indices are out of bounds.
+    /// If not enabled, and indices exceed bounds, the kernel will panic.
+    pub check_bounds: bool,
+}
+
+impl Default for TakeOptions {
+    fn default() -> Self {
+        Self {
+            check_bounds: false,
+        }
+    }
+}
+
+/// `take` implementation for primitive arrays
+///
+/// This checks if an `indices` slot is populated, and gets the value from `values`
+///  as the populated index.
+/// If the `indices` slot is null, a null value is returned.
+/// For example, given:
+///     values:  [1, 2, 3, null, 5]
+///     indices: [0, null, 4, 3]
+/// The result is: [1 (slot 0), null (null slot), 5 (slot 4), null (slot 3)]
+fn take_primitive<T>(values: &ArrayRef, indices: &UInt32Array) -> Result<ArrayRef>
+where
+    T: ArrowPrimitiveType,
+{
+    let mut builder = PrimitiveBuilder::<T>::new(indices.len());
+    let a = values.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+    for i in 0..indices.len() {
+        if indices.is_null(i) {
+            // populate with null if index is null
+            builder.append_null()?;
+        } else {
+            // get index value to use in looking up the value from `values`
+            let ix = indices.value(i) as usize;
+            if a.is_valid(ix) {
+                builder.append_value(a.value(ix))?;
+            } else {
+                builder.append_null()?;
+            }
+        }
+    }
+    Ok(Arc::new(builder.finish()) as ArrayRef)
+}
+
+/// `take` implementation for binary arrays
+fn take_binary(values: &ArrayRef, indices: &UInt32Array) -> Result<ArrayRef> {
+    let mut builder = BinaryBuilder::new(indices.len());
+    let a = values.as_any().downcast_ref::<BinaryArray>().unwrap();
+    for i in 0..indices.len() {
+        if indices.is_null(i) {
+            builder.append(false)?;
+        } else {
+            let ix = indices.value(i) as usize;
+            if a.is_null(ix) {
+                builder.append(false)?;
+            } else {
+                builder.append_value(a.value(ix))?;
+            }
+        }
+    }
+    Ok(Arc::new(builder.finish()) as ArrayRef)
+}
+
+/// `take` implementation for list arrays
+///
+/// Calculates the index and indexed offset for the inner array,
+/// applying `take` on the inner array, then reconstructing a list array
+/// with the indexed offsets
+fn take_list(values: &ArrayRef, indices: &UInt32Array) -> Result<ArrayRef> {
+    // TODO: Some optimizations can be done here such as if it is
+    // taking the whole list or a contiguous sublist
+    let list: &ListArray = values.as_any().downcast_ref::<ListArray>().unwrap();
+    let (list_indices, offsets) = take_value_indices_from_list(values, indices);
+    let taken = take(&list.values(), &list_indices, None)?;
+    // determine null count and null buffer, which are a function of `values` and `indices`
+    let mut null_count = 0;
+    let num_bytes = bit_util::ceil(indices.len(), 8);
+    let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
+    {
+        let null_slice = null_buf.data_mut();
+        &offsets[..]
+            .windows(2)
+            .enumerate()
+            .for_each(|(i, window): (usize, &[i32])| {
+                if window[0] != window[1] {
+                    // offsets are unequal, slot is not null
+                    bit_util::set_bit(null_slice, i);
+                } else {
+                    null_count += 1;
+                }
+            });
+    }
+    let value_offsets = Buffer::from(offsets[..].to_byte_slice());
+    // create a new list with taken data and computed null information
+    let list_data = ArrayDataBuilder::new(list.data_type().clone())
+        .len(indices.len())
+        .null_count(null_count)
+        .null_bit_buffer(null_buf.freeze())
+        .offset(0)
+        .add_child_data(taken.data())
+        .add_buffer(value_offsets)
+        .build();
+    let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
+    Ok(list_array)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_take_primitive_arrays<'a, T>(
+        data: Vec<Option<T::Native>>,
+        index: &UInt32Array,
+        options: Option<TakeOptions>,
+        expected_data: Vec<Option<T::Native>>,
+    ) where
+        T: ArrowPrimitiveType,
+        PrimitiveArray<T>: From<Vec<Option<T::Native>>> + ArrayEqual,
+    {
+        let output = PrimitiveArray::<T>::from(data);
+        let expected = PrimitiveArray::<T>::from(expected_data);
+        let output = take(&(Arc::new(output) as ArrayRef), index, options).unwrap();
+        let output = output.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
+        assert!(output.equals(&expected))
+    }
+
+    // create a simple struct for testing purposes
+    fn create_test_struct() -> ArrayRef {
+        let boolean_data = BooleanArray::from(vec![true, false, false, true]).data();
+        let int_data = Int32Array::from(vec![42, 28, 19, 31]).data();
+        let mut field_types = vec![];
+        field_types.push(Field::new("a", DataType::Boolean, true));
+        field_types.push(Field::new("b", DataType::Int32, true));
+        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
+            .len(4)
+            .null_count(0)
+            .add_child_data(boolean_data)
+            .add_child_data(int_data)
+            .build();
+        let struct_array = StructArray::from(struct_array_data);
+        Arc::new(struct_array) as ArrayRef
+    }
+
+    #[test]
+    fn test_take_primitive() {
+        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
+
+        // uint8
+        test_take_primitive_arrays::<UInt8Type>(
+            vec![Some(0), None, Some(2), Some(3), None],
+            &index,
+            None,
+            vec![Some(3), None, None, Some(3), Some(2)],
+        );
+
+        // uint16
+        test_take_primitive_arrays::<UInt16Type>(
+            vec![Some(0), None, Some(2), Some(3), None],
+            &index,
+            None,
+            vec![Some(3), None, None, Some(3), Some(2)],
+        );
+
+        // uint32
+        test_take_primitive_arrays::<UInt32Type>(
+            vec![Some(0), None, Some(2), Some(3), None],
+            &index,
+            None,
+            vec![Some(3), None, None, Some(3), Some(2)],
+        );
+
+        // int64
+        test_take_primitive_arrays::<Int64Type>(
+            vec![Some(0), None, Some(2), Some(-15), None],
+            &index,
+            None,
+            vec![Some(-15), None, None, Some(-15), Some(2)],
+        );
+
+        // float32
+        test_take_primitive_arrays::<Float32Type>(
+            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
+            &index,
+            None,
+            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
+        );
+
+        // float64
+        test_take_primitive_arrays::<Float64Type>(
+            vec![Some(0.0), None, Some(2.21), Some(-3.1), None],
+            &index,
+            None,
+            vec![Some(-3.1), None, None, Some(-3.1), Some(2.21)],
+        );
+
+        // boolean
+        // float32
+        test_take_primitive_arrays::<BooleanType>(
+            vec![Some(false), None, Some(true), Some(false), None],
+            &index,
+            None,
+            vec![Some(false), None, None, Some(false), Some(true)],
+        );
+    }
+
+    #[test]
+    fn test_take_binary() {
+        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4)]);
+        let mut builder: BinaryBuilder = BinaryBuilder::new(6);
+        builder.append_string("one").unwrap();
+        builder.append_null().unwrap();
+        builder.append_string("three").unwrap();
+        builder.append_string("four").unwrap();
+        builder.append_string("five").unwrap();
+        let array = Arc::new(builder.finish()) as ArrayRef;
+        let a = take(&array, &index, None).unwrap();
+        assert_eq!(a.len(), index.len());
+        builder.append_string("four").unwrap();
+        builder.append_null().unwrap();
+        builder.append_null().unwrap();
+        builder.append_string("four").unwrap();
+        builder.append_string("five").unwrap();
+        let b = builder.finish();
+        assert_eq!(a.data(), b.data());
+    }
+
+    #[test]
+    fn test_take_list() {
+        // Construct a value array, [[0,0,0], [-1,-2,-1], [2,3]]
+        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).data();
+        // Construct offsets
+        let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
+        // Construct a list array from the above two
+        let list_data_type = DataType::List(Box::new(DataType::Int32));
+        let list_data = ArrayData::builder(list_data_type.clone())
+            .len(3)
+            .add_buffer(value_offsets)
+            .add_child_data(value_data)
+            .build();
+        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
+
+        // index returns: [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
+        let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(2), Some(0)]);
+
+        let a = take(&list_array, &index, None).unwrap();
+        let a: &ListArray = a.as_any().downcast_ref::<ListArray>().unwrap();
+
+        // construct a value aray with expected results:
+        // [[2,3], null, [-1,-2,-1], [2,3], [0,0,0]]
+        let expected_data = Int32Array::from(vec![
+            Some(2),
+            Some(3),
+            Some(-1),
+            Some(-2),
+            Some(-1),
+            Some(2),
+            Some(3),
+            Some(0),
+            Some(0),
+            Some(0),
+        ])
+        .data();
+        // construct offsets
+        let expected_offsets = Buffer::from(&[0, 2, 2, 5, 7, 10].to_byte_slice());
+        // construct list array from the two
+        let expected_list_data = ArrayData::builder(list_data_type.clone())
+            .len(5)
+            .null_count(1)
+            // null buffer remains the same as only the indices have nulls
+            .null_bit_buffer(index.data().null_bitmap().as_ref().unwrap().bits.clone())
+            .add_buffer(expected_offsets)
+            .add_child_data(expected_data)
+            .build();
+        let expected_list_array = ListArray::from(expected_list_data);
+
+        assert!(a.equals(&expected_list_array));
+    }
+
+    #[test]
+    fn test_take_list_with_value_nulls() {
+        // Construct a value array, [[0,null,0], [-1,-2,3], [null], [5,null]]
+        let value_data = Int32Array::from(vec![
+            Some(0),
+            None,
+            Some(0),
+            Some(-1),
+            Some(-2),
+            Some(3),
+            None,
+            Some(5),
+            None,
+        ])
+        .data();
+        // Construct offsets
+        let value_offsets = Buffer::from(&[0, 3, 6, 7, 9].to_byte_slice());
+        // Construct a list array from the above two
+        let list_data_type = DataType::List(Box::new(DataType::Int32));
+        let list_data = ArrayData::builder(list_data_type.clone())
+            .len(4)
+            .add_buffer(value_offsets)
+            .null_count(0)
+            .null_bit_buffer(Buffer::from([0b10111101, 0b00000000]))
+            .add_child_data(value_data)
+            .build();
+        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
+
+        // index returns: [[null], null, [-1,-2,3], [2,null], [0,null,0]]
+        let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
+
+        let a = take(&list_array, &index, None).unwrap();
+        let a: &ListArray = a.as_any().downcast_ref::<ListArray>().unwrap();
+
+        // construct a value aray with expected results:
+        // [[null], null, [-1,-2,3], [5,null], [0,null,0]]
+        let expected_data = Int32Array::from(vec![
+            None,
+            Some(-1),
+            Some(-2),
+            Some(3),
+            Some(5),
+            None,
+            Some(0),
+            None,
+            Some(0),
+        ])
+        .data();
+        // construct offsets
+        let expected_offsets = Buffer::from(&[0, 1, 1, 4, 6, 9].to_byte_slice());
+        // construct list array from the two
+        let expected_list_data = ArrayData::builder(list_data_type.clone())
+            .len(5)
+            .null_count(1)
+            // null buffer remains the same as only the indices have nulls
+            .null_bit_buffer(index.data().null_bitmap().as_ref().unwrap().bits.clone())
+            .add_buffer(expected_offsets)
+            .add_child_data(expected_data)
+            .build();
+        let expected_list_array = ListArray::from(expected_list_data);
+
+        assert!(a.equals(&expected_list_array));
+    }
+
+    #[test]
+    fn test_take_list_with_list_nulls() {
+        // Construct a value array, [[0,null,0], [-1,-2,3], null, [5,null]]
+        let value_data = Int32Array::from(vec![
+            Some(0),
+            None,
+            Some(0),
+            Some(-1),
+            Some(-2),
+            Some(3),
+            Some(5),
+            None,
+        ])
+        .data();
+        // Construct offsets
+        let value_offsets = Buffer::from(&[0, 3, 6, 6, 8].to_byte_slice());
+        // Construct a list array from the above two
+        let list_data_type = DataType::List(Box::new(DataType::Int32));
+        let list_data = ArrayData::builder(list_data_type.clone())
+            .len(4)
+            .add_buffer(value_offsets)
+            .null_count(1)
+            .null_bit_buffer(Buffer::from([0b01111101]))
+            .add_child_data(value_data)
+            .build();
+        let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
+
+        // index returns: [null, null, [-1,-2,3], [5,null], [0,null,0]]
+        let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
+
+        let a = take(&list_array, &index, None).unwrap();
+        let a: &ListArray = a.as_any().downcast_ref::<ListArray>().unwrap();
+
+        // construct a value aray with expected results:
+        // [null, null, [-1,-2,3], [5,null], [0,null,0]]
+        let expected_data = Int32Array::from(vec![
+            Some(-1),
+            Some(-2),
+            Some(3),
+            Some(5),
+            None,
+            Some(0),
+            None,
+            Some(0),
+        ])
+        .data();
+        // construct offsets
+        let expected_offsets = Buffer::from(&[0, 0, 0, 3, 5, 8].to_byte_slice());
+        // construct list array from the two
+        let mut null_bits: [u8; 1] = [0; 1];
+        bit_util::set_bit(&mut null_bits, 2);
+        bit_util::set_bit(&mut null_bits, 3);
+        bit_util::set_bit(&mut null_bits, 4);
+        let expected_list_data = ArrayData::builder(list_data_type.clone())
+            .len(5)
+            .null_count(2)
+            // null buffer must be recalculated as both values and indices have nulls
+            .null_bit_buffer(Buffer::from(null_bits))
+            .add_buffer(expected_offsets)
+            .add_child_data(expected_data)
+            .build();
+        let expected_list_array = ListArray::from(expected_list_data);
+
+        assert!(a.equals(&expected_list_array));
+    }
+
+    #[test]
+    fn test_take_struct() {
+        let array = create_test_struct();
+
+        let index = UInt32Array::from(vec![0, 3, 1, 0, 2]);
+        let a = take(&array, &index, None).unwrap();
+        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(index.len(), a.len());
+        assert_eq!(0, a.null_count());
+
+        let expected_bool_data =
+            BooleanArray::from(vec![true, true, false, true, false]).data();
+        let expected_int_data = Int32Array::from(vec![42, 31, 28, 42, 19]).data();
+        let mut field_types = vec![];
+        field_types.push(Field::new("a", DataType::Boolean, true));
+        field_types.push(Field::new("b", DataType::Int32, true));
+        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
+            .len(5)
+            .null_count(0)
+            .add_child_data(expected_bool_data)
+            .add_child_data(expected_int_data)
+            .build();
+        let struct_array = StructArray::from(struct_array_data);
+        assert!(a.equals(&struct_array));
+    }
+
+    #[test]
+    fn test_take_struct_with_nulls() {
+        let array = create_test_struct();
+
+        let index = UInt32Array::from(vec![None, Some(3), Some(1), None, Some(0)]);
+        let a = take(&array, &index, None).unwrap();
+        let a: &StructArray = a.as_any().downcast_ref::<StructArray>().unwrap();
+        assert_eq!(index.len(), a.len());
+        assert_eq!(0, a.null_count());
+
+        let expected_bool_data =
+            BooleanArray::from(vec![None, Some(true), Some(false), None, Some(true)])
+                .data();
+        let expected_int_data =
+            Int32Array::from(vec![None, Some(31), Some(28), None, Some(42)]).data();
+
+        let mut field_types = vec![];
+        field_types.push(Field::new("a", DataType::Boolean, true));
+        field_types.push(Field::new("b", DataType::Int32, true));
+        let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
+            .len(5)
+            // TODO: see https://issues.apache.org/jira/browse/ARROW-5408 for why count != 2
+            .null_count(0)
+            .add_child_data(expected_bool_data)
+            .add_child_data(expected_int_data)
+            .build();
+        let struct_array = StructArray::from(struct_array_data);
+        assert!(a.equals(&struct_array));
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Array index out of bounds, cannot get item at index 6 from 5 entries"
+    )]
+    fn test_take_out_of_bounds() {
+        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(6)]);
+        let take_opt = TakeOptions { check_bounds: true };
+
+        // int64
+        test_take_primitive_arrays::<Int64Type>(
+            vec![Some(0), None, Some(2), Some(3), None],
+            &index,
+            Some(take_opt),
+            vec![None],
+        );
+    }
+}
diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs
index 7e31c52d85d..15af978af0a 100644
--- a/rust/arrow/src/compute/mod.rs
+++ b/rust/arrow/src/compute/mod.rs
@@ -27,4 +27,5 @@ pub use self::kernels::arithmetic::*;
 pub use self::kernels::boolean::*;
 pub use self::kernels::cast::*;
 pub use self::kernels::comparison::*;
+pub use self::kernels::take::*;
 pub use self::kernels::temporal::*;
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
index 55726b85eda..dc1f54fdd2a 100644
--- a/rust/arrow/src/compute/util.rs
+++ b/rust/arrow/src/compute/util.rs
@@ -17,6 +17,7 @@
 
 //! Common utilities for computation kernels.
 
+use crate::array::*;
 use crate::bitmap::Bitmap;
 use crate::buffer::Buffer;
 use crate::error::Result;
@@ -44,10 +45,57 @@ where
     }
 }
 
+/// Takes/filters a list array's inner data using the offsets of the list array.
+///
+/// Where a list array has indices `[0,2,5,10]`, taking indices of `[2,0]` returns
+/// an array of the indices `[5..10, 0..2]` and offsets `[0,5,7]` (5 elements and 2
+/// elements)
+pub(super) fn take_value_indices_from_list(
+    values: &ArrayRef,
+    indices: &UInt32Array,
+) -> (UInt32Array, Vec<i32>) {
+    // TODO: benchmark this function, there might be a faster unsafe alternative
+    // get list array's offsets
+    let list: &ListArray = values.as_any().downcast_ref::<ListArray>().unwrap();
+    let offsets: Vec<u32> = (0..=list.len())
+        .map(|i| list.value_offset(i) as u32)
+        .collect();
+    let mut new_offsets = Vec::with_capacity(indices.len());
+    let mut values = Vec::new();
+    let mut current_offset = 0;
+    // add first offset
+    new_offsets.push(0);
+    // compute the value indices, and set offsets accordingly
+    for i in 0..indices.len() {
+        if indices.is_valid(i) {
+            let ix = indices.value(i) as usize;
+            let start = offsets[ix];
+            let end = offsets[ix + 1];
+            current_offset += (end - start) as i32;
+            new_offsets.push(current_offset);
+            // if start == end, this slot is empty
+            if start != end {
+                // type annotation needed to guide compiler a bit
+                let mut offsets: Vec<Option<u32>> =
+                    (start..end).map(|v| Some(v)).collect::<Vec<Option<u32>>>();
+                values.append(&mut offsets);
+            }
+        } else {
+            new_offsets.push(current_offset);
+        }
+    }
+    (UInt32Array::from(values), new_offsets)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    use std::sync::Arc;
+
+    use crate::array::ArrayData;
+    use crate::datatypes::{DataType, ToByteSlice};
+
     #[test]
     fn test_apply_bin_op_to_option_bitmap() {
         assert_eq!(
@@ -80,4 +128,30 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_take_value_index_from_list() {
+        let value_data = Int32Array::from((0..10).collect::<Vec<i32>>()).data();
+        let value_offsets = Buffer::from(&[0, 2, 5, 10].to_byte_slice());
+        let list_data_type = DataType::List(Box::new(DataType::Int32));
+        let list_data = ArrayData::builder(list_data_type.clone())
+            .len(3)
+            .add_buffer(value_offsets.clone())
+            .add_child_data(value_data.clone())
+            .build();
+        let array = Arc::new(ListArray::from(list_data)) as ArrayRef;
+        let index = UInt32Array::from(vec![2, 0]);
+        let (indexed, offsets) = take_value_indices_from_list(&array, &index);
+        assert_eq!(vec![0, 5, 7], offsets);
+        let data = UInt32Array::from(vec![
+            Some(5),
+            Some(6),
+            Some(7),
+            Some(8),
+            Some(9),
+            Some(0),
+            Some(1),
+        ])
+        .data();
+        assert_eq!(data, indexed.data());
+    }
 }
diff --git a/rust/datafusion/Cargo.toml b/rust/datafusion/Cargo.toml
index 8a311a5acba..6e021e32e30 100644
--- a/rust/datafusion/Cargo.toml
+++ b/rust/datafusion/Cargo.toml
@@ -18,7 +18,7 @@
 [package]
 name = "datafusion"
 description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
-version = "0.14.0"
+version = "1.0.0-SNAPSHOT"
 homepage = "https://github.com/apache/arrow"
 repository = "https://github.com/apache/arrow"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -36,23 +36,25 @@ path = "src/lib.rs"
 
 [[bin]]
 name = "datafusion-cli"
-path = "src/bin/repl.rs"
+path = "src/bin/main.rs"
+
+[features]
+default = ["cli"]
+cli = ["rustyline"]
 
 [dependencies]
 fnv = "1.0.3"
-arrow = "0.14.0"
-parquet = "0.14.0"
-serde = { version = "1.0.80", features = ["rc"] }
-serde_derive = "1.0.80"
-serde_json = "1.0.33"
+arrow = { path = "../arrow", version = "1.0.0-SNAPSHOT" }
+parquet = { path = "../parquet", version = "1.0.0-SNAPSHOT" }
 sqlparser = "0.2.0"
 clap = "2.33.0"
-rustyline = "4.1.0"
 prettytable-rs = "0.8.0"
+rustyline = {version = "4.1.0", optional = true}
 
 [dev-dependencies]
 criterion = "0.2.0"
 
+
 [[bench]]
 name = "aggregate_query_sql"
 harness = false
diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
index ad32b031e15..9c41128b6b1 100644
--- a/rust/datafusion/README.md
+++ b/rust/datafusion/README.md
@@ -29,7 +29,7 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-datafusion = "0.14.0"
+datafusion = "1.0.0-SNAPSHOT"
 ```
 
 #### Use as a bin
diff --git a/cpp/src/arrow/util/ubsan.cc b/rust/datafusion/src/bin/main.rs
similarity index 81%
rename from cpp/src/arrow/util/ubsan.cc
rename to rust/datafusion/src/bin/main.rs
index f3952f80e51..deb5b796b2d 100644
--- a/cpp/src/arrow/util/ubsan.cc
+++ b/rust/datafusion/src/bin/main.rs
@@ -14,15 +14,12 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#include "arrow/util/ubsan.h"
 
-namespace arrow {
-namespace util {
+// Only bring in dependencies for the repl when the cli feature is enabled.
+#[cfg(feature = "cli")]
+mod repl;
 
-namespace internal {
-
-uint8_t non_null_filler = 0xFF;
-
-}  // namespace internal
-}  // namespace util
-}  // namespace arrow
+pub fn main() {
+    #[cfg(feature = "cli")]
+    repl::main()
+}
diff --git a/rust/datafusion/src/bin/repl.rs b/rust/datafusion/src/bin/repl.rs
index 88a88943940..7ef042ed431 100644
--- a/rust/datafusion/src/bin/repl.rs
+++ b/rust/datafusion/src/bin/repl.rs
@@ -17,12 +17,9 @@
 
 #![allow(bare_trait_objects)]
 
-#[macro_use]
-extern crate clap;
-
 use arrow::array::*;
 use arrow::datatypes::{DataType, TimeUnit};
-use clap::{App, Arg};
+use clap::{crate_version, App, Arg};
 use datafusion::error::{ExecutionError, Result};
 use datafusion::execution::context::ExecutionContext;
 use datafusion::execution::relation::Relation;
@@ -32,7 +29,7 @@ use std::cell::RefMut;
 use std::env;
 use std::path::Path;
 
-fn main() {
+pub fn main() {
     let matches = App::new("DataFusion")
         .version(crate_version!())
         .about(
diff --git a/rust/datafusion/src/execution/context.rs b/rust/datafusion/src/execution/context.rs
index 1a31b9ab22a..b041f55d75a 100644
--- a/rust/datafusion/src/execution/context.rs
+++ b/rust/datafusion/src/execution/context.rs
@@ -66,6 +66,7 @@ impl ExecutionContext {
     /// of RecordBatch instances)
     pub fn sql(&mut self, sql: &str, batch_size: usize) -> Result<Rc<RefCell<Relation>>> {
         let plan = self.create_logical_plan(sql)?;
+        let plan = self.optimize(&plan)?;
         Ok(self.execute(&plan, batch_size)?)
     }
 
@@ -86,7 +87,7 @@ impl ExecutionContext {
                 // plan the query (create a logical relational plan)
                 let plan = query_planner.sql_to_rel(&ansi)?;
 
-                Ok(self.optimize(&plan)?)
+                Ok(plan)
             }
             DFASTNode::CreateExternalTable {
                 name,
@@ -172,7 +173,8 @@ impl ExecutionContext {
                 Ok(Arc::new(TableImpl::new(Arc::new(LogicalPlan::TableScan {
                     schema_name: "".to_string(),
                     table_name: table_name.to_string(),
-                    schema: provider.schema().clone(),
+                    table_schema: provider.schema().clone(),
+                    projected_schema: provider.schema().clone(),
                     projection: None,
                 }))))
             }
diff --git a/rust/datafusion/src/execution/table_impl.rs b/rust/datafusion/src/execution/table_impl.rs
index 680dec22844..1a92aacf4fe 100644
--- a/rust/datafusion/src/execution/table_impl.rs
+++ b/rust/datafusion/src/execution/table_impl.rs
@@ -19,7 +19,7 @@
 
 use std::sync::Arc;
 
-use crate::arrow::datatypes::{Field, Schema};
+use crate::arrow::datatypes::{DataType, Field, Schema};
 use crate::error::{ExecutionError, Result};
 use crate::logicalplan::Expr::Literal;
 use crate::logicalplan::ScalarValue;
@@ -41,34 +41,64 @@ impl TableImpl {
 impl Table for TableImpl {
     /// Apply a projection based on a list of column names
     fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<Table>> {
-        let schema = self.plan.schema();
-        let mut projection_index: Vec<usize> = Vec::with_capacity(columns.len());
         let mut expr: Vec<Expr> = Vec::with_capacity(columns.len());
+        for column_name in columns {
+            let i = self.column_index(column_name)?;
+            expr.push(Expr::Column(i));
+        }
+        self.select(expr)
+    }
+
+    /// Create a projection based on arbitrary expressions
+    fn select(&self, expr_list: Vec<Expr>) -> Result<Arc<Table>> {
+        let schema = self.plan.schema();
+        let mut field: Vec<Field> = Vec::with_capacity(expr_list.len());
 
-        for column in columns {
-            match schema.column_with_name(column) {
-                Some((i, _)) => {
-                    projection_index.push(i);
-                    expr.push(Expr::Column(i));
+        for expr in &expr_list {
+            match expr {
+                Expr::Column(i) => {
+                    field.push(schema.field(*i).clone());
                 }
-                _ => {
-                    return Err(ExecutionError::InvalidColumn(format!(
-                        "No column named '{}'",
-                        column
-                    )));
+                other => {
+                    return Err(ExecutionError::NotImplemented(format!(
+                        "Expr {:?} is not currently supported in this context",
+                        other
+                    )))
                 }
             }
         }
 
         Ok(Arc::new(TableImpl::new(Arc::new(
             LogicalPlan::Projection {
-                expr,
+                expr: expr_list.clone(),
                 input: self.plan.clone(),
-                schema: projection(&schema, &projection_index)?,
+                schema: Arc::new(Schema::new(field)),
             },
         ))))
     }
 
+    /// Create a selection based on a filter expression
+    fn filter(&self, expr: Expr) -> Result<Arc<Table>> {
+        Ok(Arc::new(TableImpl::new(Arc::new(LogicalPlan::Selection {
+            expr,
+            input: self.plan.clone(),
+        }))))
+    }
+
+    /// Perform an aggregate query
+    fn aggregate(
+        &self,
+        group_expr: Vec<Expr>,
+        aggr_expr: Vec<Expr>,
+    ) -> Result<Arc<Table>> {
+        Ok(Arc::new(TableImpl::new(Arc::new(LogicalPlan::Aggregate {
+            input: self.plan.clone(),
+            group_expr,
+            aggr_expr,
+            schema: Arc::new(Schema::new(vec![])),
+        }))))
+    }
+
     /// Limit the number of rows
     fn limit(&self, n: usize) -> Result<Arc<Table>> {
         Ok(Arc::new(TableImpl::new(Arc::new(LogicalPlan::Limit {
@@ -78,24 +108,230 @@ impl Table for TableImpl {
         }))))
     }
 
+    /// Return an expression representing a column within this table
+    fn col(&self, name: &str) -> Result<Expr> {
+        Ok(Expr::Column(self.column_index(name)?))
+    }
+
+    /// Return the index of a column within this table's schema
+    fn column_index(&self, name: &str) -> Result<usize> {
+        let schema = self.plan.schema();
+        match schema.column_with_name(name) {
+            Some((i, _)) => Ok(i),
+            _ => Err(ExecutionError::InvalidColumn(format!(
+                "No column named '{}'",
+                name
+            ))),
+        }
+    }
+
+    /// Create an expression to represent the min() aggregate function
+    fn min(&self, expr: &Expr) -> Result<Expr> {
+        self.aggregate_expr("MIN", expr)
+    }
+
+    /// Create an expression to represent the max() aggregate function
+    fn max(&self, expr: &Expr) -> Result<Expr> {
+        self.aggregate_expr("MAX", expr)
+    }
+
+    /// Create an expression to represent the sum() aggregate function
+    fn sum(&self, expr: &Expr) -> Result<Expr> {
+        self.aggregate_expr("SUM", expr)
+    }
+
+    /// Create an expression to represent the avg() aggregate function
+    fn avg(&self, expr: &Expr) -> Result<Expr> {
+        self.aggregate_expr("AVG", expr)
+    }
+
+    /// Create an expression to represent the count() aggregate function
+    fn count(&self, expr: &Expr) -> Result<Expr> {
+        self.aggregate_expr("COUNT", expr)
+    }
+
     /// Convert to logical plan
     fn to_logical_plan(&self) -> Arc<LogicalPlan> {
         self.plan.clone()
     }
 }
 
-/// Create a new schema by applying a projection to this schema's fields
-fn projection(schema: &Schema, projection: &Vec<usize>) -> Result<Arc<Schema>> {
-    let mut fields: Vec<Field> = Vec::with_capacity(projection.len());
-    for i in projection {
-        if *i < schema.fields().len() {
-            fields.push(schema.field(*i).clone());
-        } else {
-            return Err(ExecutionError::InvalidColumn(format!(
-                "Invalid column index {} in projection",
-                i
-            )));
+impl TableImpl {
+    /// Determine the data type for a given expression
+    fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
+        match expr {
+            Expr::Column(i) => Ok(self.plan.schema().field(*i).data_type().clone()),
+            _ => Err(ExecutionError::General(format!(
+                "Could not determine data type for expr {:?}",
+                expr
+            ))),
+        }
+    }
+
+    /// Create an expression to represent a named aggregate function
+    fn aggregate_expr(&self, name: &str, expr: &Expr) -> Result<Expr> {
+        let return_type = self.get_data_type(expr)?;
+        Ok(Expr::AggregateFunction {
+            name: name.to_string(),
+            args: vec![expr.clone()],
+            return_type,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::execution::context::ExecutionContext;
+    use std::env;
+
+    #[test]
+    fn column_index() {
+        let t = test_table();
+        assert_eq!(0, t.column_index("c1").unwrap());
+        assert_eq!(1, t.column_index("c2").unwrap());
+        assert_eq!(12, t.column_index("c13").unwrap());
+    }
+
+    #[test]
+    fn select_columns() -> Result<()> {
+        // build plan using Table API
+        let t = test_table();
+        let t2 = t.select_columns(vec!["c1", "c2", "c11"])?;
+        let plan = t2.to_logical_plan();
+
+        // build query using SQL
+        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
+
+        // the two plans should be identical
+        assert_same_plan(&plan, &sql_plan);
+
+        Ok(())
+    }
+
+    #[test]
+    fn select_expr() -> Result<()> {
+        // build plan using Table API
+        let t = test_table();
+        let t2 = t.select(vec![t.col("c1")?, t.col("c2")?, t.col("c11")?])?;
+        let plan = t2.to_logical_plan();
+
+        // build query using SQL
+        let sql_plan = create_plan("SELECT c1, c2, c11 FROM aggregate_test_100")?;
+
+        // the two plans should be identical
+        assert_same_plan(&plan, &sql_plan);
+
+        Ok(())
+    }
+
+    #[test]
+    fn select_invalid_column() -> Result<()> {
+        let t = test_table();
+
+        match t.col("invalid_column_name") {
+            Ok(_) => panic!(),
+            Err(e) => assert_eq!(
+                "InvalidColumn(\"No column named \\\'invalid_column_name\\\'\")",
+                format!("{:?}", e)
+            ),
         }
+
+        Ok(())
     }
-    Ok(Arc::new(Schema::new(fields)))
+
+    #[test]
+    fn aggregate() -> Result<()> {
+        // build plan using Table API
+        let t = test_table();
+        let group_expr = vec![t.col("c1")?];
+        let c12 = t.col("c12")?;
+        let aggr_expr = vec![
+            t.min(&c12)?,
+            t.max(&c12)?,
+            t.avg(&c12)?,
+            t.sum(&c12)?,
+            t.count(&c12)?,
+        ];
+
+        let t2 = t.aggregate(group_expr.clone(), aggr_expr.clone())?;
+
+        let plan = t2.to_logical_plan();
+
+        // build same plan using SQL API
+        let sql = "SELECT c1, MIN(c12), MAX(c12), AVG(c12), SUM(c12), COUNT(c12) \
+                   FROM aggregate_test_100 \
+                   GROUP BY c1";
+        let sql_plan = create_plan(sql)?;
+
+        // the two plans should be identical
+        assert_same_plan(&plan, &sql_plan);
+
+        Ok(())
+    }
+
+    #[test]
+    fn limit() -> Result<()> {
+        // build query using Table API
+        let t = test_table();
+        let t2 = t.select_columns(vec!["c1", "c2", "c11"])?.limit(10)?;
+        let plan = t2.to_logical_plan();
+
+        // build query using SQL
+        let sql_plan =
+            create_plan("SELECT c1, c2, c11 FROM aggregate_test_100 LIMIT 10")?;
+
+        // the two plans should be identical
+        assert_same_plan(&plan, &sql_plan);
+
+        Ok(())
+    }
+
+    /// Compare the formatted string representation of two plans for equality
+    fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) {
+        assert_eq!(format!("{:?}", plan1), format!("{:?}", plan2));
+    }
+
+    /// Create a logical plan from a SQL query
+    fn create_plan(sql: &str) -> Result<Arc<LogicalPlan>> {
+        let mut ctx = ExecutionContext::new();
+        register_aggregate_csv(&mut ctx);
+        ctx.create_logical_plan(sql)
+    }
+
+    fn test_table() -> Arc<dyn Table + 'static> {
+        let mut ctx = ExecutionContext::new();
+        register_aggregate_csv(&mut ctx);
+        ctx.table("aggregate_test_100").unwrap()
+    }
+
+    fn register_aggregate_csv(ctx: &mut ExecutionContext) {
+        let schema = aggr_test_schema();
+        let testdata = env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined");
+        ctx.register_csv(
+            "aggregate_test_100",
+            &format!("{}/csv/aggregate_test_100.csv", testdata),
+            &schema,
+            true,
+        );
+    }
+
+    fn aggr_test_schema() -> Arc<Schema> {
+        Arc::new(Schema::new(vec![
+            Field::new("c1", DataType::Utf8, false),
+            Field::new("c2", DataType::UInt32, false),
+            Field::new("c3", DataType::Int8, false),
+            Field::new("c4", DataType::Int16, false),
+            Field::new("c5", DataType::Int32, false),
+            Field::new("c6", DataType::Int64, false),
+            Field::new("c7", DataType::UInt8, false),
+            Field::new("c8", DataType::UInt16, false),
+            Field::new("c9", DataType::UInt32, false),
+            Field::new("c10", DataType::UInt64, false),
+            Field::new("c11", DataType::Float32, false),
+            Field::new("c12", DataType::Float64, false),
+            Field::new("c13", DataType::Utf8, false),
+        ]))
+    }
+
 }
diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
index 65ff3759f50..6e48162c6b5 100644
--- a/rust/datafusion/src/lib.rs
+++ b/rust/datafusion/src/lib.rs
@@ -22,9 +22,6 @@
 #![allow(bare_trait_objects)]
 
 extern crate arrow;
-#[macro_use]
-extern crate serde_derive;
-extern crate serde_json;
 extern crate sqlparser;
 
 pub mod datasource;
diff --git a/rust/datafusion/src/logicalplan.rs b/rust/datafusion/src/logicalplan.rs
index 8e690563233..cc2414909e9 100644
--- a/rust/datafusion/src/logicalplan.rs
+++ b/rust/datafusion/src/logicalplan.rs
@@ -27,7 +27,7 @@ use crate::optimizer::utils;
 use crate::sql::parser::FileType;
 
 /// Enumeration of supported function types (Scalar and Aggregate)
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[derive(Debug, Clone)]
 pub enum FunctionType {
     /// Simple function returning a value per DataFrame
     Scalar,
@@ -82,7 +82,7 @@ impl FunctionMeta {
 }
 
 /// Operators applied to expressions
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Operator {
     /// Expressions are equal
     Eq,
@@ -119,7 +119,7 @@ pub enum Operator {
 }
 
 /// ScalarValue enumeration
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum ScalarValue {
     /// null value
     Null,
@@ -173,7 +173,7 @@ impl ScalarValue {
 }
 
 /// Relation expression
-#[derive(Serialize, Deserialize, Clone, PartialEq)]
+#[derive(Clone, PartialEq)]
 pub enum Expr {
     /// index into a value within the row or complex value
     Column(usize),
@@ -379,7 +379,7 @@ impl fmt::Debug for Expr {
 
 /// The LogicalPlan represents different types of relations (such as Projection,
 /// Selection, etc) and can be created by the SQL query planner and the DataFrame API.
-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Clone)]
 pub enum LogicalPlan {
     /// A Projection (essentially a SELECT with an expression list)
     Projection {
@@ -423,8 +423,10 @@ pub enum LogicalPlan {
         schema_name: String,
         /// The name of the table
         table_name: String,
-        /// The schema description
-        schema: Arc<Schema>,
+        /// The underlying table schema
+        table_schema: Arc<Schema>,
+        /// The projected schema
+        projected_schema: Arc<Schema>,
         /// Optional column indices to use as a projection
         projection: Option<Vec<usize>>,
     },
@@ -462,7 +464,9 @@ impl LogicalPlan {
     pub fn schema(&self) -> &Arc<Schema> {
         match self {
             LogicalPlan::EmptyRelation { schema } => &schema,
-            LogicalPlan::TableScan { schema, .. } => &schema,
+            LogicalPlan::TableScan {
+                projected_schema, ..
+            } => &projected_schema,
             LogicalPlan::Projection { schema, .. } => &schema,
             LogicalPlan::Selection { input, .. } => input.schema(),
             LogicalPlan::Aggregate { schema, .. } => &schema,
@@ -614,7 +618,6 @@ pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use serde_json;
     use std::thread;
 
     #[test]
@@ -623,7 +626,8 @@ mod tests {
         let plan = Arc::new(LogicalPlan::TableScan {
             schema_name: "".to_string(),
             table_name: "people".to_string(),
-            schema: Arc::new(schema),
+            table_schema: Arc::new(schema.clone()),
+            projected_schema: Arc::new(schema),
             projection: Some(vec![0, 1, 4]),
         });
 
@@ -634,44 +638,4 @@ mod tests {
         });
     }
 
-    #[test]
-    fn serialize_plan() {
-        let schema = Schema::new(vec![
-            Field::new("first_name", DataType::Utf8, false),
-            Field::new("last_name", DataType::Utf8, false),
-            Field::new(
-                "address",
-                DataType::Struct(vec![
-                    Field::new("street", DataType::Utf8, false),
-                    Field::new("zip", DataType::UInt16, false),
-                ]),
-                false,
-            ),
-        ]);
-
-        let plan = LogicalPlan::TableScan {
-            schema_name: "".to_string(),
-            table_name: "people".to_string(),
-            schema: Arc::new(schema),
-            projection: Some(vec![0, 1, 4]),
-        };
-
-        let serialized = serde_json::to_string(&plan).unwrap();
-
-        assert_eq!(
-            "{\"TableScan\":{\
-             \"schema_name\":\"\",\
-             \"table_name\":\"people\",\
-             \"schema\":{\"fields\":[\
-             {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false},\
-             {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false},\
-             {\"name\":\"address\",\"data_type\":{\"Struct\":\
-             [\
-             {\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false},\
-             {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false}]},\"nullable\":false}\
-             ]},\
-             \"projection\":[0,1,4]}}",
-            serialized
-        );
-    }
 }
diff --git a/rust/datafusion/src/optimizer/projection_push_down.rs b/rust/datafusion/src/optimizer/projection_push_down.rs
index 9f9534b6d1e..6bc35da8200 100644
--- a/rust/datafusion/src/optimizer/projection_push_down.rs
+++ b/rust/datafusion/src/optimizer/projection_push_down.rs
@@ -139,9 +139,16 @@ impl ProjectionPushDown {
             LogicalPlan::TableScan {
                 schema_name,
                 table_name,
-                schema,
+                table_schema,
+                projection,
                 ..
             } => {
+                if projection.is_some() {
+                    return Err(ExecutionError::General(
+                        "Cannot run projection push-down rule more than once".to_string(),
+                    ));
+                }
+
                 // once we reach the table scan, we can use the accumulated set of column
                 // indexes as the projection in the table scan
                 let mut projection: Vec<usize> = Vec::with_capacity(accum.len());
@@ -153,8 +160,8 @@ impl ProjectionPushDown {
                 // create the projected schema
                 let mut projected_fields: Vec<Field> =
                     Vec::with_capacity(projection.len());
-                for i in 0..projection.len() {
-                    projected_fields.push(schema.fields()[i].clone());
+                for i in &projection {
+                    projected_fields.push(table_schema.fields()[*i].clone());
                 }
                 let projected_schema = Schema::new(projected_fields);
 
@@ -169,7 +176,7 @@ impl ProjectionPushDown {
                     ));
                 }
 
-                for i in 0..schema.fields().len() {
+                for i in 0..table_schema.fields().len() {
                     if let Some(n) = projection.iter().position(|v| *v == i) {
                         mapping.insert(i, n);
                     }
@@ -179,7 +186,8 @@ impl ProjectionPushDown {
                 Ok(Arc::new(LogicalPlan::TableScan {
                     schema_name: schema_name.to_string(),
                     table_name: table_name.to_string(),
-                    schema: Arc::new(projected_schema),
+                    table_schema: table_schema.clone(),
+                    projected_schema: Arc::new(projected_schema),
                     projection: Some(projection),
                 }))
             }
@@ -381,8 +389,11 @@ mod tests {
         // check that table scan schema now contains 2 columns
         match optimized_plan.as_ref().borrow() {
             LogicalPlan::Projection { input, .. } => match input.as_ref().borrow() {
-                LogicalPlan::TableScan { ref schema, .. } => {
-                    assert_eq!(2, schema.fields().len());
+                LogicalPlan::TableScan {
+                    ref projected_schema,
+                    ..
+                } => {
+                    assert_eq!(2, projected_schema.fields().len());
                 }
                 _ => assert!(false),
             },
@@ -403,14 +414,16 @@ mod tests {
 
     /// all tests share a common table
     fn test_table_scan() -> LogicalPlan {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::UInt32, false),
+            Field::new("b", DataType::UInt32, false),
+            Field::new("c", DataType::UInt32, false),
+        ]));
         TableScan {
             schema_name: "default".to_string(),
             table_name: "test".to_string(),
-            schema: Arc::new(Schema::new(vec![
-                Field::new("a", DataType::UInt32, false),
-                Field::new("b", DataType::UInt32, false),
-                Field::new("c", DataType::UInt32, false),
-            ])),
+            table_schema: schema.clone(),
+            projected_schema: schema,
             projection: None,
         }
     }
diff --git a/rust/datafusion/src/optimizer/type_coercion.rs b/rust/datafusion/src/optimizer/type_coercion.rs
index 681a3cafcc2..ee21a174fb7 100644
--- a/rust/datafusion/src/optimizer/type_coercion.rs
+++ b/rust/datafusion/src/optimizer/type_coercion.rs
@@ -74,6 +74,7 @@ impl OptimizerRule for TypeCoercionRule {
             LogicalPlan::TableScan { .. } => Ok(Arc::new(plan.clone())),
             LogicalPlan::EmptyRelation { .. } => Ok(Arc::new(plan.clone())),
             LogicalPlan::Limit { .. } => Ok(Arc::new(plan.clone())),
+            LogicalPlan::CreateExternalTable { .. } => Ok(Arc::new(plan.clone())),
             other => Err(ExecutionError::NotImplemented(format!(
                 "Type coercion optimizer rule does not support relation: {:?}",
                 other
diff --git a/rust/datafusion/src/sql/parser.rs b/rust/datafusion/src/sql/parser.rs
index 74ae4cfca06..4b8bf0f4c0b 100644
--- a/rust/datafusion/src/sql/parser.rs
+++ b/rust/datafusion/src/sql/parser.rs
@@ -32,7 +32,7 @@ macro_rules! parser_err {
 }
 
 /// Types of files to parse as DataFrames
-#[derive(Serialize, Deserialize, Debug, Clone)]
+#[derive(Debug, Clone)]
 pub enum FileType {
     /// Newline-delimited JSON
     NdJson,
@@ -162,8 +162,6 @@ impl DFParser {
                         }
                     }
 
-                    //println!("Parsed {} column defs", columns.len());
-
                     let mut headers = true;
                     let file_type: FileType = if self
                         .parser
diff --git a/rust/datafusion/src/sql/planner.rs b/rust/datafusion/src/sql/planner.rs
index 71ad507bf27..90cb3bd8d88 100644
--- a/rust/datafusion/src/sql/planner.rs
+++ b/rust/datafusion/src/sql/planner.rs
@@ -108,7 +108,6 @@ impl SqlToRel {
                             .collect::<Result<Vec<Expr>>>()?,
                         None => vec![],
                     };
-                    //println!("GROUP BY: {:?}", group_expr);
 
                     let mut all_fields: Vec<Expr> = group_expr.clone();
                     aggr_expr.iter().for_each(|x| all_fields.push(x.clone()));
@@ -175,8 +174,17 @@ impl SqlToRel {
                     let limit_plan = match limit {
                         &Some(ref limit_expr) => {
                             let input_schema = order_by_plan.schema();
-                            let limit_rex =
-                                self.sql_to_rex(&limit_expr, &input_schema.clone())?;
+
+                            let limit_rex = match self
+                                .sql_to_rex(&limit_expr, &input_schema.clone())?
+                            {
+                                Expr::Literal(ScalarValue::Int64(n)) => {
+                                    Ok(Expr::Literal(ScalarValue::UInt32(n as u32)))
+                                }
+                                _ => Err(ExecutionError::General(
+                                    "Unexpected expression for LIMIT clause".to_string(),
+                                )),
+                            }?;
 
                             LogicalPlan::Limit {
                                 expr: limit_rex,
@@ -196,7 +204,8 @@ impl SqlToRel {
                     Some(schema) => Ok(Arc::new(LogicalPlan::TableScan {
                         schema_name: String::from("default"),
                         table_name: id.clone(),
-                        schema: schema.clone(),
+                        table_schema: schema.clone(),
+                        projected_schema: schema.clone(),
                         projection: None,
                     })),
                     None => Err(ExecutionError::General(format!(
diff --git a/rust/datafusion/src/table.rs b/rust/datafusion/src/table.rs
index 51ac343aefb..b9ae895b8a3 100644
--- a/rust/datafusion/src/table.rs
+++ b/rust/datafusion/src/table.rs
@@ -19,7 +19,7 @@
 //! and the DataFrame API in Apache Spark
 
 use crate::error::Result;
-use crate::logicalplan::LogicalPlan;
+use crate::logicalplan::{Expr, LogicalPlan};
 use std::sync::Arc;
 
 /// Table is an abstraction of a logical query plan
@@ -27,65 +27,43 @@ pub trait Table {
     /// Select columns by name
     fn select_columns(&self, columns: Vec<&str>) -> Result<Arc<Table>>;
 
+    /// Create a projection based on arbitrary expressions
+    fn select(&self, expr: Vec<Expr>) -> Result<Arc<Table>>;
+
+    /// Create a selection based on a filter expression
+    fn filter(&self, expr: Expr) -> Result<Arc<Table>>;
+
+    /// Perform an aggregate query
+    fn aggregate(
+        &self,
+        group_expr: Vec<Expr>,
+        aggr_expr: Vec<Expr>,
+    ) -> Result<Arc<Table>>;
+
     /// limit the number of rows
     fn limit(&self, n: usize) -> Result<Arc<Table>>;
 
     /// Return the logical plan
     fn to_logical_plan(&self) -> Arc<LogicalPlan>;
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::execution::context::ExecutionContext;
-    use arrow::datatypes::*;
-    use std::env;
-
-    #[test]
-    fn demonstrate_api_usage() {
-        let mut ctx = ExecutionContext::new();
-        register_aggregate_csv(&mut ctx);
 
-        let t = ctx.table("aggregate_test_100").unwrap();
+    /// Return an expression representing a column within this table
+    fn col(&self, name: &str) -> Result<Expr>;
 
-        let example = t
-            .select_columns(vec!["c1", "c2", "c11"])
-            .unwrap()
-            .limit(10)
-            .unwrap();
+    /// Create an expression to represent the min() aggregate function
+    fn min(&self, expr: &Expr) -> Result<Expr>;
 
-        let plan = example.to_logical_plan();
+    /// Create an expression to represent the max() aggregate function
+    fn max(&self, expr: &Expr) -> Result<Expr>;
 
-        assert_eq!("Limit: UInt32(10)\n  Projection: #0, #1, #10\n    TableScan: aggregate_test_100 projection=None", format!("{:?}", plan));
-    }
+    /// Create an expression to represent the sum() aggregate function
+    fn sum(&self, expr: &Expr) -> Result<Expr>;
 
-    fn register_aggregate_csv(ctx: &mut ExecutionContext) {
-        let schema = aggr_test_schema();
-        let testdata = env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined");
-        ctx.register_csv(
-            "aggregate_test_100",
-            &format!("{}/csv/aggregate_test_100.csv", testdata),
-            &schema,
-            true,
-        );
-    }
+    /// Create an expression to represent the avg() aggregate function
+    fn avg(&self, expr: &Expr) -> Result<Expr>;
 
-    fn aggr_test_schema() -> Arc<Schema> {
-        Arc::new(Schema::new(vec![
-            Field::new("c1", DataType::Utf8, false),
-            Field::new("c2", DataType::UInt32, false),
-            Field::new("c3", DataType::Int8, false),
-            Field::new("c4", DataType::Int16, false),
-            Field::new("c5", DataType::Int32, false),
-            Field::new("c6", DataType::Int64, false),
-            Field::new("c7", DataType::UInt8, false),
-            Field::new("c8", DataType::UInt16, false),
-            Field::new("c9", DataType::UInt32, false),
-            Field::new("c10", DataType::UInt64, false),
-            Field::new("c11", DataType::Float32, false),
-            Field::new("c12", DataType::Float64, false),
-            Field::new("c13", DataType::Utf8, false),
-        ]))
-    }
+    /// Create an expression to represent the count() aggregate function
+    fn count(&self, expr: &Expr) -> Result<Expr>;
 
+    /// Return the index of a column within this table's schema
+    fn column_index(&self, name: &str) -> Result<usize>;
 }
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 34a3b0fe463..508e1f21cb2 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -28,11 +28,64 @@ use arrow::datatypes::{DataType, Field, Schema};
 
 use datafusion::datasource::parquet::ParquetTable;
 use datafusion::datasource::TableProvider;
+use datafusion::error::Result;
 use datafusion::execution::context::ExecutionContext;
 use datafusion::execution::relation::Relation;
+use datafusion::logicalplan::LogicalPlan;
 
 const DEFAULT_BATCH_SIZE: usize = 1024 * 1024;
 
+#[test]
+fn nyc() -> Result<()> {
+    // schema for nyxtaxi csv files
+    let schema = Schema::new(vec![
+        Field::new("VendorID", DataType::Utf8, true),
+        Field::new("tpep_pickup_datetime", DataType::Utf8, true),
+        Field::new("tpep_dropoff_datetime", DataType::Utf8, true),
+        Field::new("passenger_count", DataType::Utf8, true),
+        Field::new("trip_distance", DataType::Float64, true),
+        Field::new("RatecodeID", DataType::Utf8, true),
+        Field::new("store_and_fwd_flag", DataType::Utf8, true),
+        Field::new("PULocationID", DataType::Utf8, true),
+        Field::new("DOLocationID", DataType::Utf8, true),
+        Field::new("payment_type", DataType::Utf8, true),
+        Field::new("fare_amount", DataType::Float64, true),
+        Field::new("extra", DataType::Float64, true),
+        Field::new("mta_tax", DataType::Float64, true),
+        Field::new("tip_amount", DataType::Float64, true),
+        Field::new("tolls_amount", DataType::Float64, true),
+        Field::new("improvement_surcharge", DataType::Float64, true),
+        Field::new("total_amount", DataType::Float64, true),
+    ]);
+
+    let mut ctx = ExecutionContext::new();
+    ctx.register_csv("tripdata", "file.csv", &schema, true);
+
+    let logical_plan = ctx.create_logical_plan(
+        "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount) \
+         FROM tripdata GROUP BY passenger_count",
+    )?;
+
+    let optimized_plan = ctx.optimize(&logical_plan)?;
+
+    match optimized_plan.as_ref() {
+        LogicalPlan::Aggregate { input, .. } => match input.as_ref() {
+            LogicalPlan::TableScan {
+                ref projected_schema,
+                ..
+            } => {
+                assert_eq!(2, projected_schema.fields().len());
+                assert_eq!(projected_schema.field(0).name(), "passenger_count");
+                assert_eq!(projected_schema.field(1).name(), "fare_amount");
+            }
+            _ => assert!(false),
+        },
+        _ => assert!(false),
+    }
+
+    Ok(())
+}
+
 #[test]
 fn parquet_query() {
     let mut ctx = ExecutionContext::new();
@@ -106,6 +159,7 @@ fn csv_query_avg_multi_batch() {
     //TODO add ORDER BY once supported, to make this test determistic
     let sql = "SELECT avg(c12) FROM aggregate_test_100";
     let plan = ctx.create_logical_plan(&sql).unwrap();
+    let plan = ctx.optimize(&plan).unwrap();
     let results = ctx.execute(&plan, 4).unwrap();
     let mut relation = results.borrow_mut();
     let batch = relation.next().unwrap().unwrap();
@@ -363,6 +417,7 @@ fn load_parquet_table(name: &str) -> Rc<dyn TableProvider> {
 /// Execute query and return result set as tab delimited string
 fn execute(ctx: &mut ExecutionContext, sql: &str) -> String {
     let plan = ctx.create_logical_plan(&sql).unwrap();
+    let plan = ctx.optimize(&plan).unwrap();
     let results = ctx.execute(&plan, DEFAULT_BATCH_SIZE).unwrap();
     result_str(&results)
 }
diff --git a/rust/parquet/Cargo.toml b/rust/parquet/Cargo.toml
index 44bed001112..71852bae97a 100644
--- a/rust/parquet/Cargo.toml
+++ b/rust/parquet/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "parquet"
-version = "0.14.0"
+version = "1.0.0-SNAPSHOT"
 license = "Apache-2.0"
 description = "Apache Parquet implementation in Rust"
 homepage = "https://github.com/apache/arrow"
@@ -40,7 +40,7 @@ lz4 = "1.23"
 zstd = "0.4"
 chrono = "0.4"
 num-bigint = "0.2"
-arrow = "0.14.0"
+arrow = { path = "../arrow", version = "1.0.0-SNAPSHOT" }
 
 [dev-dependencies]
 lazy_static = "1"
diff --git a/rust/parquet/README.md b/rust/parquet/README.md
index 84cc2ccd5bc..2e7ee4cc600 100644
--- a/rust/parquet/README.md
+++ b/rust/parquet/README.md
@@ -23,7 +23,7 @@
 Add this to your Cargo.toml:
 ```toml
 [dependencies]
-parquet = "0.14.0"
+parquet = "1.0.0-SNAPSHOT"
 ```
 
 and this to your crate root:
@@ -44,7 +44,7 @@ while let Some(record) = iter.next() {
     println!("{}", record);
 }
 ```
-See [crate documentation](https://docs.rs/crate/parquet/0.14.0) on available API.
+See [crate documentation](https://docs.rs/crate/parquet/1.0.0-SNAPSHOT) on available API.
 
 ## Supported Parquet Version
 - Parquet-format 2.4.0
diff --git a/rust/parquet/src/schema/mod.rs b/rust/parquet/src/schema/mod.rs
index 351ce973371..f689db3c1b8 100644
--- a/rust/parquet/src/schema/mod.rs
+++ b/rust/parquet/src/schema/mod.rs
@@ -64,3 +64,4 @@
 pub mod parser;
 pub mod printer;
 pub mod types;
+pub mod visitor;
diff --git a/rust/parquet/src/schema/visitor.rs b/rust/parquet/src/schema/visitor.rs
new file mode 100644
index 00000000000..6970f9ed47a
--- /dev/null
+++ b/rust/parquet/src/schema/visitor.rs
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::basic::{LogicalType, Repetition};
+use crate::errors::ParquetError::General;
+use crate::errors::Result;
+use crate::schema::types::{Type, TypePtr};
+
+/// A utility trait to help user to traverse against parquet type.
+pub trait TypeVisitor<R, C> {
+    /// Called when a primitive type hit.
+    fn visit_primitive(&mut self, primitive_type: TypePtr, context: C) -> Result<R>;
+
+    /// Default implementation when visiting a list.
+    ///
+    /// It checks list type definition and calls `visit_list_with_item` with extracted
+    /// item type.
+    ///
+    /// To fully understand this algorithm, please refer to
+    /// [parquet doc](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md).
+    fn visit_list(&mut self, list_type: TypePtr, context: C) -> Result<R> {
+        match list_type.as_ref() {
+            Type::PrimitiveType { .. } => panic!(
+                "{:?} is a list type and can't be processed as primitive.",
+                list_type
+            ),
+            Type::GroupType {
+                basic_info: _,
+                fields,
+            } if fields.len() == 1 => {
+                let list_item = fields.first().unwrap();
+
+                match list_item.as_ref() {
+                    Type::PrimitiveType { .. } => {
+                        if list_item.get_basic_info().repetition() == Repetition::REPEATED
+                        {
+                            self.visit_list_with_item(
+                                list_type.clone(),
+                                list_item,
+                                context,
+                            )
+                        } else {
+                            Err(General(
+                                "Primitive element type of list must be repeated."
+                                    .to_string(),
+                            ))
+                        }
+                    }
+                    Type::GroupType {
+                        basic_info: _,
+                        fields,
+                    } => {
+                        if fields.len() == 1
+                            && list_item.name() != "array"
+                            && list_item.name() != format!("{}_tuple", list_type.name())
+                        {
+                            self.visit_list_with_item(
+                                list_type.clone(),
+                                fields.first().unwrap(),
+                                context,
+                            )
+                        } else {
+                            self.visit_list_with_item(
+                                list_type.clone(),
+                                list_item,
+                                context,
+                            )
+                        }
+                    }
+                }
+            }
+            _ => Err(General(
+                "Group element type of list can only contain one field.".to_string(),
+            )),
+        }
+    }
+
+    /// Called when a struct type hit.
+    fn visit_struct(&mut self, struct_type: TypePtr, context: C) -> Result<R>;
+
+    /// Called when a map type hit.
+    fn visit_map(&mut self, map_type: TypePtr, context: C) -> Result<R>;
+
+    /// A utility method which detects input type and calls corresponding method.
+    fn dispatch(&mut self, cur_type: TypePtr, context: C) -> Result<R> {
+        if cur_type.is_primitive() {
+            self.visit_primitive(cur_type, context)
+        } else {
+            match cur_type.get_basic_info().logical_type() {
+                LogicalType::LIST => self.visit_list(cur_type, context),
+                LogicalType::MAP | LogicalType::MAP_KEY_VALUE => {
+                    self.visit_map(cur_type, context)
+                }
+                _ => self.visit_struct(cur_type, context),
+            }
+        }
+    }
+
+    /// Called by `visit_list`.
+    fn visit_list_with_item(
+        &mut self,
+        list_type: TypePtr,
+        item_type: &Type,
+        context: C,
+    ) -> Result<R>;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::TypeVisitor;
+    use crate::basic::Type as PhysicalType;
+    use crate::errors::Result;
+    use crate::schema::parser::parse_message_type;
+    use crate::schema::types::{Type, TypePtr};
+    use std::rc::Rc;
+
+    struct TestVisitorContext {}
+    struct TestVisitor {
+        primitive_visited: bool,
+        struct_visited: bool,
+        list_visited: bool,
+        root_type: TypePtr,
+    }
+
+    impl TypeVisitor<bool, TestVisitorContext> for TestVisitor {
+        fn visit_primitive(
+            &mut self,
+            primitive_type: TypePtr,
+            _context: TestVisitorContext,
+        ) -> Result<bool> {
+            assert_eq!(
+                self.get_field_by_name(primitive_type.name()).as_ref(),
+                primitive_type.as_ref()
+            );
+            self.primitive_visited = true;
+            Ok(true)
+        }
+
+        fn visit_struct(
+            &mut self,
+            struct_type: TypePtr,
+            _context: TestVisitorContext,
+        ) -> Result<bool> {
+            assert_eq!(
+                self.get_field_by_name(struct_type.name()).as_ref(),
+                struct_type.as_ref()
+            );
+            self.struct_visited = true;
+            Ok(true)
+        }
+
+        fn visit_map(
+            &mut self,
+            _map_type: TypePtr,
+            _context: TestVisitorContext,
+        ) -> Result<bool> {
+            unimplemented!()
+        }
+
+        fn visit_list_with_item(
+            &mut self,
+            list_type: TypePtr,
+            item_type: &Type,
+            _context: TestVisitorContext,
+        ) -> Result<bool> {
+            assert_eq!(
+                self.get_field_by_name(list_type.name()).as_ref(),
+                list_type.as_ref()
+            );
+            assert_eq!("element", item_type.name());
+            assert_eq!(PhysicalType::INT32, item_type.get_physical_type());
+            self.list_visited = true;
+            Ok(true)
+        }
+    }
+
+    impl TestVisitor {
+        fn new(root: TypePtr) -> Self {
+            Self {
+                primitive_visited: false,
+                struct_visited: false,
+                list_visited: false,
+                root_type: root,
+            }
+        }
+
+        fn get_field_by_name(&self, name: &str) -> TypePtr {
+            self.root_type
+                .get_fields()
+                .iter()
+                .find(|t| t.name() == name)
+                .map(|t| t.clone())
+                .unwrap()
+        }
+    }
+
+    #[test]
+    fn test_visitor() {
+        let message_type = "
+          message spark_schema {
+            REQUIRED INT32 a;
+            OPTIONAL group inner_schema {
+              REQUIRED INT32 b;
+              REQUIRED DOUBLE c;
+            }
+
+            OPTIONAL group e (LIST) {
+              REPEATED group list {
+                REQUIRED INT32 element;
+              }
+            }
+        ";
+
+        let parquet_type = Rc::new(parse_message_type(&message_type).unwrap());
+
+        let mut visitor = TestVisitor::new(parquet_type.clone());
+        for f in parquet_type.get_fields() {
+            let c = TestVisitorContext {};
+            assert!(visitor.dispatch(f.clone(), c).unwrap());
+        }
+
+        assert!(visitor.struct_visited);
+        assert!(visitor.primitive_visited);
+        assert!(visitor.list_visited);
+    }
+}
diff --git a/rust/rustfmt.toml b/rust/rustfmt.toml
index b692119bbc1..418b9e2acbb 100644
--- a/rust/rustfmt.toml
+++ b/rust/rustfmt.toml
@@ -15,7 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-max_width = 90
-wrap_comments = true
-format_doc_comments = true
-comment_width = 90
\ No newline at end of file
+max_width = 90
\ No newline at end of file
diff --git a/site/README.md b/site/README.md
index 73fd185ee12..33758e9e3f7 100644
--- a/site/README.md
+++ b/site/README.md
@@ -36,6 +36,13 @@ gem install jekyll bundler
 bundle install
 ```
 
+On some platforms, the Ruby `nokogiri` library may fail to build, in
+such cases the following configuration option may help:
+
+```
+bundle config build.nokogiri --use-system-libraries
+```
+
 If you are planning to publish the website, you must clone the arrow-site git
 repository. Run this command from the `site` directory so that `asf-site` is a
 subdirectory of `site`.
diff --git a/site/_data/contributors.yml b/site/_data/contributors.yml
index 95b18b2f094..185a565abf6 100644
--- a/site/_data/contributors.yml
+++ b/site/_data/contributors.yml
@@ -16,10 +16,13 @@
 # Database of contributors to Apache Arrow (WIP)
 # Blogs and other pages use this data
 #
+- name: Apache Arrow Community
+  githubId: apache
+  homepage: https://arrow.apache.org
 - name: Wes McKinney
   apacheId: wesm
   githubId: wesm
-  homepage: http://wesmckinney.com
+  homepage: https://wesmckinney.com
   role: PMC
 - name: Uwe Korn
   apacheId: uwe
diff --git a/site/_data/versions.yml b/site/_data/versions.yml
index 7fb1fef2084..d42541c482c 100644
--- a/site/_data/versions.yml
+++ b/site/_data/versions.yml
@@ -16,16 +16,16 @@
 # Database of the current verion
 #
 current:
-  number: '0.13.0'
-  pinned_number: '0.13.*'
-  date: '1 April 2019'
-  git-tag: 'dfb9e7af3cd92722893a3819b6676dfdef08f896'
-  github-tag-link: 'https://github.com/apache/arrow/releases/tag/apache-arrow-0.13.0'
-  release-notes: 'https://arrow.apache.org/release/0.13.0.html'
-  mirrors: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.13.0/'
-  tarball_name: 'apache-arrow-0.13.0.tar.gz'
-  mirrors-tar: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.13.0/apache-arrow-0.13.0.tar.gz'
-  java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%220.13.0%22'
-  asc: 'https://www.apache.org/dist/arrow/arrow-0.13.0/apache-arrow-0.13.0.tar.gz.asc'
-  sha256: 'https://www.apache.org/dist/arrow/arrow-0.13.0/apache-arrow-0.13.0.tar.gz.sha256'
-  sha512: 'https://www.apache.org/dist/arrow/arrow-0.13.0/apache-arrow-0.13.0.tar.gz.sha512'
+  number: '0.14.0'
+  pinned_number: '0.14.*'
+  date: '4 July 2019'
+  git-tag: 'a591d76ad9a657110368aa422bb00f4010cb6b6e'
+  github-tag-link: 'https://github.com/apache/arrow/releases/tag/apache-arrow-0.14.0'
+  release-notes: 'https://arrow.apache.org/release/0.14.0.html'
+  mirrors: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.14.0/'
+  tarball_name: 'apache-arrow-0.14.0.tar.gz'
+  mirrors-tar: 'https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.14.0/apache-arrow-0.14.0.tar.gz'
+  java-artifacts: 'http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.arrow%22%20AND%20v%3A%220.14.0%22'
+  asc: 'https://www.apache.org/dist/arrow/arrow-0.14.0/apache-arrow-0.14.0.tar.gz.asc'
+  sha256: 'https://www.apache.org/dist/arrow/arrow-0.14.0/apache-arrow-0.14.0.tar.gz.sha256'
+  sha512: 'https://www.apache.org/dist/arrow/arrow-0.14.0/apache-arrow-0.14.0.tar.gz.sha512'
diff --git a/site/_posts/2019-07-08-0.14.0-release.md b/site/_posts/2019-07-08-0.14.0-release.md
new file mode 100644
index 00000000000..1c1f1c8a62d
--- /dev/null
+++ b/site/_posts/2019-07-08-0.14.0-release.md
@@ -0,0 +1,300 @@
+---
+layout: post
+title: "Apache Arrow 0.14.0 Release"
+date: "2019-07-02 00:00:00 -0600"
+author: apache
+categories: [release]
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+The Apache Arrow team is pleased to announce the 0.14.0 release. This
+covers 3 months of development work and includes [**602 resolved
+issues**][1] from [**75 distinct contributors**][2].  See the Install
+Page to learn how to get the libraries for your platform. The
+[complete changelog][3] is also available.
+
+This post will give some brief highlights in the project since the
+0.13.0 release from April.
+
+## New committers
+
+Since the 0.13.0 release, the following have been added:
+
+* [Neville Dipale][5] was added as a committer
+* [François Saint-Jacques][6] was added as a committer
+* [Praveen Kumar][7] was added as a committer
+
+Thank you for all your contributions!
+
+## Upcoming 1.0.0 Format Stability Release
+
+We are planning for our next major release to move from 0.14.0 to
+1.0.0. The major version number will indicate stability of the Arrow
+columnar format and binary protocol. While the format has already been
+stable since December 2017, we believe it is a good idea to make this
+stability official and to indicate that it is safe to persist
+serialized Arrow data in applications. This means that applications
+will be able to safely upgrade to new Arrow versions without having to
+worry about backwards incompatibilities. We will write in a future
+blog post about the stability guarantees we intend to provide to help
+application developers plan accordingly.
+
+## Packaging
+
+We added support for the following platforms:
+
+* Debian GNU/Linux buster
+* Ubuntu 19.04
+
+We dropped support for Ubuntu 14.04.
+
+## Development Infrastructure and Tooling
+
+As the project has grown larger and more diverse, we are increasingly
+outgrowing what we can test in public continuous integration services
+like Travis CI and Appveyor. In addition, we share these resources
+with the entire Apache Software Foundation, and given the high volume
+of pull requests into Apache Arrow, maintainers are frequently waiting
+many hours for the green light to merge patches.
+
+The complexity of our testing is driven by the number of different
+components and programming languages as well as increasingly long
+compilation and test execution times as individual libraries grow
+larger. The 50 minute time limit of public CI services is simply too
+limited to comprehensively test the project. Additionally, the CI host
+machines are constrained in their features and memory limits,
+preventing us from testing features that are only relevant on large
+amounts of data (10GB or more) or functionality that requires a
+CUDA-enabled GPU.
+
+Organizations that contribute to Apache Arrow are working on physical
+build infrastructure and tools to improve build times and build
+scalability. One such new tool is `ursabot`, a GitHub-enabled bot
+that can be used to trigger builds either on physical build or in the
+cloud. It can also be used to trigger benchmark timing comparisons. If
+you are contributing to the project, you may see Ursabot being
+employed to trigger tests in pull requests.
+
+To help assist with migrating away from Travis CI, we are also working
+to make as many of our builds reproducible with Docker and not reliant
+on Travis CI-specific configuration details. This will also help
+contributors reproduce build failures locally without having to wait
+for Travis CI.
+
+## Columnar Format Notes
+
+* User-defined "extension" types have been formalized in the Arrow
+  format, enabling library users to embed custom data types in the
+  Arrow columnar format. Initial support is available in C++, Java,
+  and Python.
+* A new Duration logical type was added to represent absolute lengths
+  of time.
+
+## Arrow Flight notes
+
+Flight now supports many of the features of a complete RPC
+framework.
+
+* Authentication APIs are now supported across all languages (ARROW-5137)
+* Encrypted communication using OpenSSL is supported (ARROW-5643,
+  ARROW-5529)
+* Clients can specify timeouts on remote calls (ARROW-5136)
+* On the protocol level, endpoints are now identified with URIs, to
+  support an open-ended number of potential transports (including TLS
+  and Unix sockets, and perhaps even non-gRPC-based transports in the
+  future) (ARROW-4651)
+* Application-defined metadata can be sent alongside data (ARROW-4626,
+  ARROW-4627).
+
+Windows is now a supported platform for Flight in C++ and Python
+(ARROW-3294), and Python wheels are shipped for all languages
+(ARROW-3150, ARROW-5656). C++, Python, and Java have been brought to
+parity, now that actions can return streaming results in Java
+(ARROW-5254).
+
+## C++ notes
+
+188 resolved issues related to the C++ implementation, so we summarize
+some of the work here.
+
+### General platform improvements
+
+* A FileSystem abstraction (ARROW-767) has been added, which paves the
+  way for a future Arrow Datasets library allowing to access sharded
+  data on arbitrary storage systems, including remote or cloud
+  storage. A first draft of the Datasets API was committed in
+  ARROW-5512. Right now, this comes with no implementation, but we
+  expect to slowly build it up in the coming weeks or months. Early
+  feedback is welcome on this API.
+* The dictionary API has been reworked in ARROW-3144. The dictionary
+  values used to be tied to the DictionaryType instance, which ended
+  up too inflexible. Since dictionary-encoding is more often an
+  optimization than a semantic property of the data, we decided to
+  move the dictionary values to the ArrayData structure, making it
+  natural for dictionary-encoded arrays to share the same DataType
+  instance, regardless of the encoding details.
+* The FixedSizeList and Map types have been implemented, including in
+  integration tests. The Map type is akin to a List of Struct(key,
+  value) entries, but making it explicit that the underlying data has
+  key-value mapping semantics. Also, map entries are always non-null.
+* A `Result<T>` class has been introduced in ARROW-4800. The aim is to
+  allow to return an error as w ell as a function's logical result
+  without resorting to pointer-out arguments.
+* The Parquet C++ library has been refactored to use common Arrow IO
+  classes for improved C++ platform interoperability.
+
+### Line-delimited JSON reader
+
+A multithreaded line-delimited JSON reader (powered internally by
+RapidJSON) is now available for use (also in Python and R via
+bindings) . This will likely be expanded to support more kinds of JSON
+storage in the future.
+
+### New computational kernels
+
+A number of new computational kernels have been developed
+
+* Compare filter for logical comparisons yielding boolean arrays
+* Filter kernel for selecting elements of an input array according to
+  a boolean selection array.
+* Take kernel, which selects elements by integer index, has been
+  expanded to support nested types
+
+## C# Notes
+
+The native C# implementation has continued to mature since 0.13. This
+release includes a number of performance, memory use, and usability
+improvements.
+
+## Go notes
+
+Go's support for the Arrow columnar format continues to expand. Go now
+supports reading and writing the Arrow columnar binary protocol, and
+it has also been **added to the cross language integration
+tests**. There are now four languages (C++, Go, Java, and JavaScript)
+included in our integration tests to verify cross-language
+interoperability.
+
+## Java notes
+
+* Support for referencing arbitrary memory using `ArrowBuf` has been
+  implemented, paving the way for memory map support in Java
+* A number of performance improvements around vector value access were
+  added (see ARROW-5264, ARROW-5290).
+* The Map type has been implemented in Java and integration tested
+  with C++
+* Several microbenchmarks have been added and improved.  Including a
+  significant speed-up of zeroing out buffers.
+* A new algorithms package has been started to contain reference
+  implementations of common algorithms.  The initial contribution is
+  for Array/Vector sorting.
+
+## JavaScript Notes
+
+A new incremental [array builder API][4] is available.
+
+## MATLAB Notes
+
+Version 0.14.0 features improved Feather file support in the MEX bindings.
+
+## Python notes
+
+* We fixed a problem with the Python wheels causing the Python wheels
+  to be much larger in 0.13.0 than they were in 0.12.0. Since the
+  introduction of LLVM into our build toolchain, the wheels are going
+  to still be significantly bigger. We are interested in approaches to
+  enable pyarrow to be installed in pieces with pip or conda rather
+  than monolithically.
+* It is now possible to define ExtensionTypes with a Python
+  implementation (ARROW-840). Those ExtensionTypes can survive a
+  roundtrip through C++ and serialization.
+* The Flight improvements highlighted above (see C++ notes) are all
+  available from Python. Furthermore, Flight is now bundled in our
+  binary wheels and conda packages for Linux, Windows and macOS
+  (ARROW-3150, ARROW-5656).
+* We will build "manylinux2010" binary wheels for Linux systems, in
+  addition to "manylinux1" wheels (ARROW-2461). Manylinux2010 is a
+  newer standard for more recent systems, with less limiting toolchain
+  constraints. Installing manylinux2010 wheels requires an up-to-date
+  version of pip.
+* Various bug fixes for CSV reading in Python and C++ including the
+  ability to parse Decimal(x, y) columns.
+
+### Parquet improvements
+
+* Column statistics for logical types like unicode strings, unsigned
+  integers, and timestamps are casted to compatible Python types (see
+  ARROW-4139)
+* It's now possible to configure "data page" sizes when writing a file
+  from Python
+
+## Ruby and C GLib notes
+
+The GLib and Ruby bindings have been tracking features in the C++
+project. This release includes bindings for Gandiva, JSON reader, and
+other C++ features.
+
+## Rust notes
+
+There is ongoing work in Rust happening on Parquet file support,
+computational kernels, and the DataFusion query engine. See the full
+changelog for details.
+
+## R notes
+
+We have been working on build and packaging for R so that community
+members can hopefully release the project to CRAN in the near
+future. Feature development for R has continued to follow the upstream
+C++ project.
+
+## Community Discussions Ongoing
+
+There are a number of active discussions ongoing on the developer
+dev@arrow.apache.org mailing list. We look forward to hearing from the
+community there:
+
+* [Timing and scope of 1.0.0 release][15]
+* [Solutions to increase continuous integration capacity][13]
+* [A proposal for versioning and forward/backward compatibility
+  guarantees for the 1.0.0 release][8] was shared, not much discussion has
+  occurred yet.
+* [Addressing possible unaligned access and undefined behavior concerns][9]
+  in the Arrow binary protocol
+* [Supporting smaller than 128-bit encoding of fixed width decimals][10]
+* [Forking the Avro C++ implementation][11] so as to adapt it to Arrow's
+  needs
+* [Sparse representation and compression in Arrow][12]
+* [Flight extensions: middleware API and generalized Put operations][14]
+
+[1]: https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20%3D%20Resolved%20AND%20fixVersion%20%3D%200.13.0
+[2]: https://arrow.apache.org/release/0.14.0.html#contributors
+[3]: https://arrow.apache.org/release/0.14.0.html
+[4]: https://github.com/apache/arrow/tree/master/js/src/builder
+[5]: https://github.com/nevi-me
+[6]: https://github.com/fsaintjacques
+[7]: https://github.com/praveenbingo
+[8]: https://lists.apache.org/thread.html/5715a4d402c835d22d929a8069c5c0cf232077a660ee98639d544af8@%3Cdev.arrow.apache.org%3E
+[9]: https://lists.apache.org/thread.html/8440be572c49b7b2ffb76b63e6d935ada9efd9c1c2021369b6d27786@%3Cdev.arrow.apache.org%3E
+[10]: https://lists.apache.org/thread.html/31b00086c2991104bd71fb1a2173f32b4a2f569d8e7b5b41e836f3a3@%3Cdev.arrow.apache.org%3E
+[11]: https://lists.apache.org/thread.html/97d78112ab583eecb155a7d78342c1063df65d64ec3ccfa0b18737c3@%3Cdev.arrow.apache.org%3E
+[12]: https://lists.apache.org/thread.html/a99124e57c14c3c9ef9d98f3c80cfe1dd25496bf3ff7046778add937@%3Cdev.arrow.apache.org%3E
+[13]: https://lists.apache.org/thread.html/96b2e22606e8a7b0ad7dc4aae16f232724d1059b34636676ed971d40@%3Cdev.arrow.apache.org%3E
+[14]: https://lists.apache.org/thread.html/82a7c026ad18dbe9fdbcffa3560979aff6fd86dd56a49f40d9cfb46e@%3Cdev.arrow.apache.org%3E
+[15]: https://lists.apache.org/thread.html/44a7a3d256ab5dbd62da6fe45b56951b435697426bf4adedb6520907@%3Cdev.arrow.apache.org%3E
\ No newline at end of file
diff --git a/site/_release/0.14.0.md b/site/_release/0.14.0.md
new file mode 100644
index 00000000000..ed191d9d355
--- /dev/null
+++ b/site/_release/0.14.0.md
@@ -0,0 +1,760 @@
+---
+layout: default
+title: Apache Arrow 0.14.0 Release
+permalink: /release/0.14.0.html
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+# Apache Arrow 0.14.0 (4 July 2019)
+
+This is a major release covering more than 3 months of development.
+
+## Download
+
+* [**Source Artifacts**][1]
+* **Binary Artifacts**
+  * [For CentOS][2]
+  * [For Debian][3]
+  * [For Python][4]
+  * [For Ubuntu][5]
+* [Git tag][6]
+
+## Contributors
+
+This release includes 570 commits from 78 distinct contributors.
+
+```console
+$ git shortlog -sn apache-arrow-0.13.0..apache-arrow-0.14.0
+    67	Antoine Pitrou
+    62	Wes McKinney
+    37	Sebastien Binet
+    34	Sutou Kouhei
+    25	Kouhei Sutou
+    24	Neal Richardson
+    22	Romain Francois
+    21	Joris Van den Bossche
+    20	Benjamin Kietzman
+    19	Micah Kornfield
+    18	Krisztián Szűcs
+    16	David Li
+    16	François Saint-Jacques
+    14	Yosuke Shiro
+    11	Pindikura Ravindra
+    11	Praveen
+    10	liyafan82
+    10	tianchen
+     7	Kenta Murata
+     7	Neville Dipale
+     7	Renjie Liu
+     6	Chao Sun
+     6	Deepak Majeti
+     6	Hatem Helal
+     5	Andy Grove
+     4	Brian Hulette
+     4	Bryan Cutler
+     4	Eric Erhardt
+     4	Zhuo Peng
+     4	ptaylor
+     4	shengjun.li
+     3	Marco Neumann
+     3	Philipp Moritz
+     3	Prudhvi Porandla
+     3	Renat Valiullin
+     3	TP Boudreau
+     3	Zhiyuan Zheng
+     2	Fabio B. Silva
+     2	HyukjinKwon
+     2	John Muehlhausen
+     2	Prashanth Govindarajan
+     2	alexandreyc
+     2	shyam
+     1	Alex Sergeev
+     1	Anatoly Myachev
+     1	Anson Qian
+     1	Frank Wessels
+     1	Gidon Gershinsky
+     1	Guillaume Horel
+     1	Ivan Sadikov
+     1	Jamie Blondin
+     1	Javier Luraschi
+     1	Jim Walker
+     1	Johannes Luong
+     1	Jonathan A. Sternberg
+     1	Kevin Gurney
+     1	MASAMIKI
+     1	Mark Cafaro
+     1	Masayuki Takahashi
+     1	Miguel Cabrera
+     1	Nuno Sucena Almeida
+     1	Pearu Peterson
+     1	Robin Kåveland Hansen
+     1	Rok
+     1	Ryan Murray
+     1	Takuya Kato
+     1	Yngve Sekse Kristiansen
+     1	Yuqi Gu
+     1	Yurui Zhou
+     1	Zhijun Fu
+     1	daslu
+     1	dlekkas
+     1	emkornfield
+     1	praveenbingo
+     1	rjzamora
+     1	siddharth
+     1	tianchen92
+     1	tiger
+```
+
+## Patch Committers
+
+The following Apache committers merged contributed patches to the repository.
+
+```console
+$ git shortlog -csn apache-arrow-0.13.0..apache-arrow-0.14.0
+   213	Wes McKinney
+    67	Antoine Pitrou
+    40	Sutou Kouhei
+    36	Krisztián Szűcs
+    36	Sebastien Binet
+    34	Micah Kornfield
+    33	Kouhei Sutou
+    22	Pindikura Ravindra
+    19	Romain Francois
+    18	Chao Sun
+    16	François Saint-Jacques
+    10	Uwe L. Korn
+     6	Bryan Cutler
+     4	Brian Hulette
+     4	Yosuke Shiro
+     3	GitHub
+     2	Andy Grove
+     2	Philipp Moritz
+     2	Sidd
+     1	Deepak Majeti
+     1	Robert Nishihara
+     1	ptaylor
+```
+
+## Changelog
+
+### New Features and Improvements
+
+* [ARROW-1012](https://issues.apache.org/jira/browse/ARROW-1012) - [C++] Create a configurable implementation of RecordBatchReader that reads from Apache Parquet files
+* [ARROW-1207](https://issues.apache.org/jira/browse/ARROW-1207) - [C++] Implement Map logical type
+* [ARROW-1261](https://issues.apache.org/jira/browse/ARROW-1261) - [Java] Add container type for Map logical type
+* [ARROW-1278](https://issues.apache.org/jira/browse/ARROW-1278) - Integration tests for Fixed Size List type
+* [ARROW-1279](https://issues.apache.org/jira/browse/ARROW-1279) - [Integration][Java] Integration tests for Map type
+* [ARROW-1280](https://issues.apache.org/jira/browse/ARROW-1280) - [C++] Implement Fixed Size List type
+* [ARROW-1496](https://issues.apache.org/jira/browse/ARROW-1496) - [JS] Upload coverage data to codecov.io
+* [ARROW-1558](https://issues.apache.org/jira/browse/ARROW-1558) - [C++] Implement boolean selection kernels
+* [ARROW-1774](https://issues.apache.org/jira/browse/ARROW-1774) - [C++] Add "view" function to create zero-copy views for compatible types, if supported
+* [ARROW-1957](https://issues.apache.org/jira/browse/ARROW-1957) - [Python] Write nanosecond timestamps using new NANO LogicalType Parquet unit
+* [ARROW-1983](https://issues.apache.org/jira/browse/ARROW-1983) - [Python] Add ability to write parquet \`\_metadata\` file
+* [ARROW-2057](https://issues.apache.org/jira/browse/ARROW-2057) - [Python] Configure size of data pages in pyarrow.parquet.write\_table
+* [ARROW-2102](https://issues.apache.org/jira/browse/ARROW-2102) - [C++] Implement take kernel functions - primitive value type
+* [ARROW-2103](https://issues.apache.org/jira/browse/ARROW-2103) - [C++] Implement take kernel functions - string/binary value type
+* [ARROW-2104](https://issues.apache.org/jira/browse/ARROW-2104) - [C++] Implement take kernel functions - nested array value type
+* [ARROW-2105](https://issues.apache.org/jira/browse/ARROW-2105) - [C++] Implement take kernel functions - properly handle special indices
+* [ARROW-2217](https://issues.apache.org/jira/browse/ARROW-2217) - [C++] Add option to use dynamic linking for compression library dependencies
+* [ARROW-2298](https://issues.apache.org/jira/browse/ARROW-2298) - [Python] Add option to not consider NaN to be null when converting to an integer Arrow type
+* [ARROW-2412](https://issues.apache.org/jira/browse/ARROW-2412) - [Integration] Add nested dictionary integration test
+* [ARROW-2467](https://issues.apache.org/jira/browse/ARROW-2467) - [Rust] Generate code using Flatbuffers
+* [ARROW-2517](https://issues.apache.org/jira/browse/ARROW-2517) - [Java] Add list<decimal> writer
+* [ARROW-2707](https://issues.apache.org/jira/browse/ARROW-2707) - [C++] Implement Table::Slice methods using Column::Slice
+* [ARROW-2796](https://issues.apache.org/jira/browse/ARROW-2796) - [C++] Simplify symbols.map file, use when building libarrow\_python
+* [ARROW-2818](https://issues.apache.org/jira/browse/ARROW-2818) - [Python] Better error message when passing SparseDataFrame into Table.from\_pandas
+* [ARROW-2835](https://issues.apache.org/jira/browse/ARROW-2835) - [C++] ReadAt/WriteAt are inconsistent with moving the files position
+* [ARROW-2969](https://issues.apache.org/jira/browse/ARROW-2969) - [R] Convert between StructArray and "nested" data.frame column containing data frame in each cell
+* [ARROW-2981](https://issues.apache.org/jira/browse/ARROW-2981) - [C++] Support scripts / documentation for running clang-tidy on codebase
+* [ARROW-3040](https://issues.apache.org/jira/browse/ARROW-3040) - [Go] add support for comparing Arrays
+* [ARROW-3041](https://issues.apache.org/jira/browse/ARROW-3041) - [Go] add support for TimeArray
+* [ARROW-3052](https://issues.apache.org/jira/browse/ARROW-3052) - [C++] Detect ORC system packages
+* [ARROW-3087](https://issues.apache.org/jira/browse/ARROW-3087) - [C++] Add kernels for comparison operations to scalars
+* [ARROW-3144](https://issues.apache.org/jira/browse/ARROW-3144) - [C++] Move "dictionary" member from DictionaryType to ArrayData to allow for changing dictionaries between Array chunks
+* [ARROW-3150](https://issues.apache.org/jira/browse/ARROW-3150) - [Python] Ship Flight-enabled Python wheels on Linux and Windows
+* [ARROW-3166](https://issues.apache.org/jira/browse/ARROW-3166) - [C++] Consolidate IO interfaces used in arrow/io and parquet-cpp
+* [ARROW-3191](https://issues.apache.org/jira/browse/ARROW-3191) - [Java] Add support for ArrowBuf to point to arbitrary memory.
+* [ARROW-3200](https://issues.apache.org/jira/browse/ARROW-3200) - [C++] Add support for reading Flight streams with dictionaries
+* [ARROW-3290](https://issues.apache.org/jira/browse/ARROW-3290) - [C++] Toolchain support for secure gRPC
+* [ARROW-3294](https://issues.apache.org/jira/browse/ARROW-3294) - [C++] Test Flight RPC on Windows / Appveyor
+* [ARROW-3314](https://issues.apache.org/jira/browse/ARROW-3314) - [R] Set -rpath using pkg-config when building
+* [ARROW-3419](https://issues.apache.org/jira/browse/ARROW-3419) - [C++] Run include-what-you-use checks as nightly build
+* [ARROW-3459](https://issues.apache.org/jira/browse/ARROW-3459) - [C++][Gandiva] Add support for variable length output vectors
+* [ARROW-3475](https://issues.apache.org/jira/browse/ARROW-3475) - [C++] Int64Builder.Finish(NumericArray<Int64Type>)
+* [ARROW-3572](https://issues.apache.org/jira/browse/ARROW-3572) - [Packaging] Correctly handle ssh origin urls for crossbow
+* [ARROW-3671](https://issues.apache.org/jira/browse/ARROW-3671) - [Go] implement Interval array
+* [ARROW-3676](https://issues.apache.org/jira/browse/ARROW-3676) - [Go] implement Decimal128 array
+* [ARROW-3679](https://issues.apache.org/jira/browse/ARROW-3679) - [Go] implement IPC protocol
+* [ARROW-3680](https://issues.apache.org/jira/browse/ARROW-3680) - [Go] implement Float16 array
+* [ARROW-3686](https://issues.apache.org/jira/browse/ARROW-3686) - [Python] Support for masked arrays in to/from numpy
+* [ARROW-3729](https://issues.apache.org/jira/browse/ARROW-3729) - [C++] Support for writing TIMESTAMP\_NANOS Parquet metadata
+* [ARROW-3732](https://issues.apache.org/jira/browse/ARROW-3732) - [R] Add functions to write RecordBatch or Schema to Message value, then read back
+* [ARROW-3758](https://issues.apache.org/jira/browse/ARROW-3758) - [R] Build R library on Windows, document build instructions for Windows developers
+* [ARROW-3759](https://issues.apache.org/jira/browse/ARROW-3759) - [R][CI] Build and test on Windows in Appveyor
+* [ARROW-3767](https://issues.apache.org/jira/browse/ARROW-3767) - [C++] Add cast for Null to any type
+* [ARROW-3780](https://issues.apache.org/jira/browse/ARROW-3780) - [R] Failed to fetch data: invalid data when collecting int16
+* [ARROW-3791](https://issues.apache.org/jira/browse/ARROW-3791) - [C++] Add type inference for boolean values in CSV files
+* [ARROW-3794](https://issues.apache.org/jira/browse/ARROW-3794) - [R] Consider mapping INT8 to integer() not raw()
+* [ARROW-3804](https://issues.apache.org/jira/browse/ARROW-3804) - [R] Consider lowering required R runtime
+* [ARROW-3810](https://issues.apache.org/jira/browse/ARROW-3810) - [R] type= argument for Array and ChunkedArray
+* [ARROW-3811](https://issues.apache.org/jira/browse/ARROW-3811) - [R] struct arrays inference
+* [ARROW-3814](https://issues.apache.org/jira/browse/ARROW-3814) - [R] RecordBatch$from\_arrays()
+* [ARROW-3815](https://issues.apache.org/jira/browse/ARROW-3815) - [R] refine record batch factory
+* [ARROW-3848](https://issues.apache.org/jira/browse/ARROW-3848) - [R] allow nbytes to be missing in RandomAccessFile$Read()
+* [ARROW-3897](https://issues.apache.org/jira/browse/ARROW-3897) - [MATLAB] Add MATLAB support for writing numeric datatypes to a Feather file
+* [ARROW-3904](https://issues.apache.org/jira/browse/ARROW-3904) - [C++/Python] Validate scale and precision of decimal128 type
+* [ARROW-4013](https://issues.apache.org/jira/browse/ARROW-4013) - [Documentation][C++] Document how to build Apache Arrow on MSYS2
+* [ARROW-4020](https://issues.apache.org/jira/browse/ARROW-4020) - [Release] Remove source artifacts from dev dist system after release vote passes
+* [ARROW-4036](https://issues.apache.org/jira/browse/ARROW-4036) - [C++] Make status codes pluggable
+* [ARROW-4047](https://issues.apache.org/jira/browse/ARROW-4047) - [Python] Document use of int96 timestamps and options in Parquet docs
+* [ARROW-4086](https://issues.apache.org/jira/browse/ARROW-4086) - [Java] Add apis to debug alloc failures
+* [ARROW-4121](https://issues.apache.org/jira/browse/ARROW-4121) - [C++] Refactor memory allocation from InvertKernel
+* [ARROW-4159](https://issues.apache.org/jira/browse/ARROW-4159) - [C++] Check for -Wdocumentation issues
+* [ARROW-4194](https://issues.apache.org/jira/browse/ARROW-4194) - [Format] Metadata.rst does not specify timezone for Timestamp type
+* [ARROW-4302](https://issues.apache.org/jira/browse/ARROW-4302) - [C++] Add OpenSSL to C++ build toolchain
+* [ARROW-4337](https://issues.apache.org/jira/browse/ARROW-4337) - [C#] Array / RecordBatch Builder Fluent API
+* [ARROW-4343](https://issues.apache.org/jira/browse/ARROW-4343) - [C++] Add as complete as possible Ubuntu Trusty / 14.04 build to docker-compose setup
+* [ARROW-4356](https://issues.apache.org/jira/browse/ARROW-4356) - [CI] Add integration (docker) test for turbodbc
+* [ARROW-4452](https://issues.apache.org/jira/browse/ARROW-4452) - [Python] Serializing sparse torch tensors
+* [ARROW-4453](https://issues.apache.org/jira/browse/ARROW-4453) - [Python] Create Cython wrappers for SparseTensor
+* [ARROW-4467](https://issues.apache.org/jira/browse/ARROW-4467) -  [Rust] [DataFusion] Create a REPL & Dockerfile for DataFusion
+* [ARROW-4503](https://issues.apache.org/jira/browse/ARROW-4503) - [C#] ArrowStreamReader allocates and copies data excessively
+* [ARROW-4504](https://issues.apache.org/jira/browse/ARROW-4504) - [C++] Reduce the number of unit test executables
+* [ARROW-4505](https://issues.apache.org/jira/browse/ARROW-4505) - [C++] Nicer PrettyPrint for date32
+* [ARROW-4566](https://issues.apache.org/jira/browse/ARROW-4566) - [C++][Flight] Add option to run arrow-flight-benchmark against a perf server running on a different host
+* [ARROW-4596](https://issues.apache.org/jira/browse/ARROW-4596) - [Rust] [DataFusion] Implement COUNT aggregate function
+* [ARROW-4622](https://issues.apache.org/jira/browse/ARROW-4622) - [C++] [Python] MakeDense and MakeSparse in UnionArray should accept a vector of Field
+* [ARROW-4625](https://issues.apache.org/jira/browse/ARROW-4625) - [Flight] Wrap server busy-wait methods
+* [ARROW-4626](https://issues.apache.org/jira/browse/ARROW-4626) - [Flight] Add application metadata field to DoGet
+* [ARROW-4627](https://issues.apache.org/jira/browse/ARROW-4627) - [Flight] Add application metadata field to DoPut
+* [ARROW-4701](https://issues.apache.org/jira/browse/ARROW-4701) - [C++] Add JSON chunker benchmarks
+* [ARROW-4702](https://issues.apache.org/jira/browse/ARROW-4702) - [C++] Upgrade dependency versions
+* [ARROW-4708](https://issues.apache.org/jira/browse/ARROW-4708) - [C++] Add multithreaded JSON reader
+* [ARROW-4714](https://issues.apache.org/jira/browse/ARROW-4714) - [C++][Java] Providing JNI interface to Read ORC file via Arrow C++
+* [ARROW-4717](https://issues.apache.org/jira/browse/ARROW-4717) - [C#] Consider exposing ValueTask instead of Task
+* [ARROW-4719](https://issues.apache.org/jira/browse/ARROW-4719) - [C#] Implement ChunkedArray, Column and Table in C#
+* [ARROW-4741](https://issues.apache.org/jira/browse/ARROW-4741) - [Java] Add documentation to all classes and enable checkstyle for class javadocs
+* [ARROW-4787](https://issues.apache.org/jira/browse/ARROW-4787) - [C++] Include "null" values (perhaps with an option to toggle on/off) in hash kernel actions
+* [ARROW-4788](https://issues.apache.org/jira/browse/ARROW-4788) - [C++] Develop less verbose API for constructing StructArray
+* [ARROW-4800](https://issues.apache.org/jira/browse/ARROW-4800) - [C++] Create/port a StatusOr implementation to be able to return a status or a type
+* [ARROW-4805](https://issues.apache.org/jira/browse/ARROW-4805) - [Rust] Write temporal arrays to CSV
+* [ARROW-4806](https://issues.apache.org/jira/browse/ARROW-4806) - [Rust] Support casting temporal arrays in cast kernels
+* [ARROW-4824](https://issues.apache.org/jira/browse/ARROW-4824) - [Python] read\_csv should accept io.StringIO objects
+* [ARROW-4827](https://issues.apache.org/jira/browse/ARROW-4827) - [C++] Implement benchmark comparison between two git revisions
+* [ARROW-4847](https://issues.apache.org/jira/browse/ARROW-4847) - [Python] Add pyarrow.table factory function that dispatches to various ctors based on type of input
+* [ARROW-4904](https://issues.apache.org/jira/browse/ARROW-4904) - [C++] Move implementations in arrow/ipc/test-common.h into libarrow\_testing
+* [ARROW-4911](https://issues.apache.org/jira/browse/ARROW-4911) - [R] Support for building package for Windows
+* [ARROW-4912](https://issues.apache.org/jira/browse/ARROW-4912) - [C++, Python] Allow specifying column names to CSV reader
+* [ARROW-4913](https://issues.apache.org/jira/browse/ARROW-4913) - [Java][Memory] Limit number of ledgers and arrowbufs
+* [ARROW-4945](https://issues.apache.org/jira/browse/ARROW-4945) - [Flight] Enable Flight integration tests in Travis
+* [ARROW-4956](https://issues.apache.org/jira/browse/ARROW-4956) - [C#] Allow ArrowBuffers to wrap external Memory in C#
+* [ARROW-4959](https://issues.apache.org/jira/browse/ARROW-4959) - [Gandiva][Crossbow] Builds broken
+* [ARROW-4968](https://issues.apache.org/jira/browse/ARROW-4968) - [Rust] StructArray builder and From<> methods should check that field types match schema
+* [ARROW-4971](https://issues.apache.org/jira/browse/ARROW-4971) - [Go] DataType equality
+* [ARROW-4972](https://issues.apache.org/jira/browse/ARROW-4972) - [Go] Array equality
+* [ARROW-4973](https://issues.apache.org/jira/browse/ARROW-4973) - [Go] Slice Array equality
+* [ARROW-4974](https://issues.apache.org/jira/browse/ARROW-4974) - [Go] Array approx equality
+* [ARROW-4990](https://issues.apache.org/jira/browse/ARROW-4990) - [C++] Kernel to compare array with array
+* [ARROW-4993](https://issues.apache.org/jira/browse/ARROW-4993) - [C++] Display summary at the end of CMake configuration
+* [ARROW-5000](https://issues.apache.org/jira/browse/ARROW-5000) - [Python] Fix deprecation warning from setup.py
+* [ARROW-5007](https://issues.apache.org/jira/browse/ARROW-5007) - [C++] Move DCHECK out of sse-utils
+* [ARROW-5020](https://issues.apache.org/jira/browse/ARROW-5020) - [C++][Gandiva] Split Gandiva-related conda packages for builds into separate .yml conda env file
+* [ARROW-5027](https://issues.apache.org/jira/browse/ARROW-5027) - [Python] Add JSON Reader
+* [ARROW-5038](https://issues.apache.org/jira/browse/ARROW-5038) - [Rust] [DataFusion] Implement AVG aggregate function
+* [ARROW-5039](https://issues.apache.org/jira/browse/ARROW-5039) - [Rust] [DataFusion] Fix bugs in CAST support
+* [ARROW-5045](https://issues.apache.org/jira/browse/ARROW-5045) - [Rust] Code coverage silently failing in CI
+* [ARROW-5053](https://issues.apache.org/jira/browse/ARROW-5053) - [Rust] [DataFusion] Use env var for location of arrow test data
+* [ARROW-5054](https://issues.apache.org/jira/browse/ARROW-5054) - [C++][Release] Test Flight in verify-release-candidate.sh
+* [ARROW-5056](https://issues.apache.org/jira/browse/ARROW-5056) - [Packaging] Adjust conda recipes to use ORC conda-forge package on unix systems
+* [ARROW-5061](https://issues.apache.org/jira/browse/ARROW-5061) - [Release] Improve 03-binary performance
+* [ARROW-5062](https://issues.apache.org/jira/browse/ARROW-5062) - [Java] Shade Java Guava dependency for Flight
+* [ARROW-5063](https://issues.apache.org/jira/browse/ARROW-5063) - [Java] FlightClient should not create a child allocator
+* [ARROW-5064](https://issues.apache.org/jira/browse/ARROW-5064) - [Release] Pass PKG\_CONFIG\_PATH to glib in the verification script
+* [ARROW-5066](https://issues.apache.org/jira/browse/ARROW-5066) - [Integration] Add flags to enable/disable implementations in integration/integration\_test.py
+* [ARROW-5071](https://issues.apache.org/jira/browse/ARROW-5071) - [Benchmarking] Performs a benchmark run with archery
+* [ARROW-5076](https://issues.apache.org/jira/browse/ARROW-5076) - [Packaging] Improve post binary upload performance
+* [ARROW-5077](https://issues.apache.org/jira/browse/ARROW-5077) - [Rust] Release process should change Cargo.toml to use release versions
+* [ARROW-5078](https://issues.apache.org/jira/browse/ARROW-5078) - [Documentation] Sphinx is failed by RemovedInSphinx30Warning
+* [ARROW-5079](https://issues.apache.org/jira/browse/ARROW-5079) - [Release] Add a script to release C# package
+* [ARROW-5080](https://issues.apache.org/jira/browse/ARROW-5080) - [Release] Add a script to release Rust packages
+* [ARROW-5081](https://issues.apache.org/jira/browse/ARROW-5081) - [C++] Consistently use PATH\_SUFFIXES in CMake config
+* [ARROW-5082](https://issues.apache.org/jira/browse/ARROW-5082) - [Python][Packaging] Reduce size of macOS and manylinux1 wheels
+* [ARROW-5083](https://issues.apache.org/jira/browse/ARROW-5083) - [Developer] In merge\_arrow\_pr.py script, allow user to set a released Fix Version
+* [ARROW-5088](https://issues.apache.org/jira/browse/ARROW-5088) - [C++] Do not set -Werror when using BUILD\_WARNING\_LEVEL=CHECKIN in release mode
+* [ARROW-5091](https://issues.apache.org/jira/browse/ARROW-5091) - [Flight] Rename FlightGetInfo message to FlightInfo
+* [ARROW-5093](https://issues.apache.org/jira/browse/ARROW-5093) - [Packaging] Add support for selective binary upload
+* [ARROW-5094](https://issues.apache.org/jira/browse/ARROW-5094) - [Packaging] Add APT/Yum verification scripts
+* [ARROW-5102](https://issues.apache.org/jira/browse/ARROW-5102) - [C++] Reduce header dependencies
+* [ARROW-5108](https://issues.apache.org/jira/browse/ARROW-5108) - [Go] implement reading primitive arrays from Arrow file
+* [ARROW-5109](https://issues.apache.org/jira/browse/ARROW-5109) - [Go] implement reading binary/string arrays from Arrow file
+* [ARROW-5110](https://issues.apache.org/jira/browse/ARROW-5110) - [Go] implement reading struct arrays from Arrow file
+* [ARROW-5111](https://issues.apache.org/jira/browse/ARROW-5111) - [Go] implement reading list arrays from Arrow file
+* [ARROW-5112](https://issues.apache.org/jira/browse/ARROW-5112) - [Go] implement writing arrays to Arrow file
+* [ARROW-5113](https://issues.apache.org/jira/browse/ARROW-5113) - [C++][Flight] Unit tests in C++ for DoPut
+* [ARROW-5115](https://issues.apache.org/jira/browse/ARROW-5115) - [JS] Implement the Vector Builders
+* [ARROW-5116](https://issues.apache.org/jira/browse/ARROW-5116) - [Rust] move kernel related files under compute/kernels
+* [ARROW-5124](https://issues.apache.org/jira/browse/ARROW-5124) - [C++] Add support for Parquet in MinGW build
+* [ARROW-5126](https://issues.apache.org/jira/browse/ARROW-5126) - [Rust] [Parquet] Convert parquet column desc to arrow data type
+* [ARROW-5127](https://issues.apache.org/jira/browse/ARROW-5127) - [Rust] [Parquet] Add page iterator
+* [ARROW-5136](https://issues.apache.org/jira/browse/ARROW-5136) - [Flight] Implement call options (timeouts)
+* [ARROW-5137](https://issues.apache.org/jira/browse/ARROW-5137) - [Flight] Implement authentication APIs
+* [ARROW-5145](https://issues.apache.org/jira/browse/ARROW-5145) - [C++] Release mode lacks convenience input validation
+* [ARROW-5150](https://issues.apache.org/jira/browse/ARROW-5150) - [Ruby] Add Arrow::Table#raw\_records
+* [ARROW-5155](https://issues.apache.org/jira/browse/ARROW-5155) - [GLib][Ruby] Add support for building union arrays from data type
+* [ARROW-5157](https://issues.apache.org/jira/browse/ARROW-5157) - [Website] Add MATLAB to powered by Apache Arrow page
+* [ARROW-5162](https://issues.apache.org/jira/browse/ARROW-5162) - [Rust] [Parquet] Rename mod reader to arrow.
+* [ARROW-5163](https://issues.apache.org/jira/browse/ARROW-5163) - [Gandiva] Cast timestamp/date are incorrectly evaluating year 0097 to 1997
+* [ARROW-5164](https://issues.apache.org/jira/browse/ARROW-5164) - [Gandiva] [C++] Introduce 32bit hash functions
+* [ARROW-5165](https://issues.apache.org/jira/browse/ARROW-5165) - [Python][Documentation] Build docs don't suggest assigning $ARROW\_BUILD\_TYPE
+* [ARROW-5168](https://issues.apache.org/jira/browse/ARROW-5168) - [GLib] Add garrow\_array\_take()
+* [ARROW-5171](https://issues.apache.org/jira/browse/ARROW-5171) - [C++] Use LESS instead of LOWER in compare enum option.
+* [ARROW-5172](https://issues.apache.org/jira/browse/ARROW-5172) - [Go] implement reading fixed-size binary arrays from Arrow file
+* [ARROW-5178](https://issues.apache.org/jira/browse/ARROW-5178) - [Python] Allow creating Table from Python dict
+* [ARROW-5179](https://issues.apache.org/jira/browse/ARROW-5179) - [Python] Return plain dicts, not OrderedDict, on Python 3.7+
+* [ARROW-5185](https://issues.apache.org/jira/browse/ARROW-5185) - [C++] Add support for Boost with CMake configuration file
+* [ARROW-5187](https://issues.apache.org/jira/browse/ARROW-5187) - [Rust] Ability to flatten StructArray into a RecordBatch
+* [ARROW-5188](https://issues.apache.org/jira/browse/ARROW-5188) - [Rust] Add temporal builders for StructArray
+* [ARROW-5189](https://issues.apache.org/jira/browse/ARROW-5189) - [Rust] [Parquet] Format individual fields within a parquet row
+* [ARROW-5190](https://issues.apache.org/jira/browse/ARROW-5190) - [R] Discussion: tibble dependency in R package
+* [ARROW-5191](https://issues.apache.org/jira/browse/ARROW-5191) - [Rust] Expose CSV and JSON reader schemas
+* [ARROW-5203](https://issues.apache.org/jira/browse/ARROW-5203) - [GLib] Add support for Compare filter
+* [ARROW-5204](https://issues.apache.org/jira/browse/ARROW-5204) - [C++] Improve BufferBuilder performance
+* [ARROW-5212](https://issues.apache.org/jira/browse/ARROW-5212) - [Go] Array BinaryBuilder in Go library has no access to resize the values buffer
+* [ARROW-5218](https://issues.apache.org/jira/browse/ARROW-5218) - [C++] Improve build when third-party library locations are specified
+* [ARROW-5219](https://issues.apache.org/jira/browse/ARROW-5219) - [C++] Build protobuf\_ep in parallel when using Ninja
+* [ARROW-5222](https://issues.apache.org/jira/browse/ARROW-5222) - [Python] Issues with installing pyarrow for development on MacOS
+* [ARROW-5225](https://issues.apache.org/jira/browse/ARROW-5225) - [Java] Improve performance of BaseValueVector#getValidityBufferSizeFromCount
+* [ARROW-5226](https://issues.apache.org/jira/browse/ARROW-5226) - [Gandiva] support compare operators for decimal
+* [ARROW-5238](https://issues.apache.org/jira/browse/ARROW-5238) - [Python] Improve usability of pyarrow.dictionary function
+* [ARROW-5241](https://issues.apache.org/jira/browse/ARROW-5241) - [Python] Add option to disable writing statistics to parquet file
+* [ARROW-5250](https://issues.apache.org/jira/browse/ARROW-5250) - [Java] remove javadoc suppression on methods.
+* [ARROW-5252](https://issues.apache.org/jira/browse/ARROW-5252) - [C++] Change variant implementation
+* [ARROW-5256](https://issues.apache.org/jira/browse/ARROW-5256) - [Packaging][deb] Failed to build with LLVM 7.1.0
+* [ARROW-5257](https://issues.apache.org/jira/browse/ARROW-5257) - [Website] Update site to use "official" Apache Arrow logo, add clearly marked links to logo
+* [ARROW-5258](https://issues.apache.org/jira/browse/ARROW-5258) - [C++/Python] Expose file metadata of dataset pieces to caller
+* [ARROW-5261](https://issues.apache.org/jira/browse/ARROW-5261) - [C++] Finish implementation of scalar types for Duration and Interval
+* [ARROW-5262](https://issues.apache.org/jira/browse/ARROW-5262) - [Python] Fix typo
+* [ARROW-5264](https://issues.apache.org/jira/browse/ARROW-5264) - [Java] Allow enabling/disabling boundary checking by environmental variable
+* [ARROW-5266](https://issues.apache.org/jira/browse/ARROW-5266) - [Go] implement read/write IPC for Float16
+* [ARROW-5268](https://issues.apache.org/jira/browse/ARROW-5268) - [GLib] Add GArrowJSONReader
+* [ARROW-5269](https://issues.apache.org/jira/browse/ARROW-5269) - [C++] Whitelist benchmarks candidates for regression checks
+* [ARROW-5275](https://issues.apache.org/jira/browse/ARROW-5275) - [C++] Write generic filesystem tests
+* [ARROW-5281](https://issues.apache.org/jira/browse/ARROW-5281) - [Rust] [Parquet] Move DataPageBuilder to test\_common
+* [ARROW-5284](https://issues.apache.org/jira/browse/ARROW-5284) - [Rust] Replace libc with std::alloc for memory allocation
+* [ARROW-5286](https://issues.apache.org/jira/browse/ARROW-5286) - [Python] support Structs in Table.from\_pandas given a known schema
+* [ARROW-5288](https://issues.apache.org/jira/browse/ARROW-5288) - [Documentation] Enrich the contribution guidelines
+* [ARROW-5289](https://issues.apache.org/jira/browse/ARROW-5289) - [C++] Move arrow/util/concatenate.h to arrow/array/
+* [ARROW-5290](https://issues.apache.org/jira/browse/ARROW-5290) - [Java] Provide a flag to enable/disable null-checking in vectors' get methods
+* [ARROW-5291](https://issues.apache.org/jira/browse/ARROW-5291) - [Python] Add wrapper for "take" kernel on Array
+* [ARROW-5298](https://issues.apache.org/jira/browse/ARROW-5298) - [Rust] Add debug implementation for Buffer
+* [ARROW-5299](https://issues.apache.org/jira/browse/ARROW-5299) - [C++] ListArray comparison is incorrect
+* [ARROW-5309](https://issues.apache.org/jira/browse/ARROW-5309) - [Python] Add clarifications to Python "append" methods that return new objects
+* [ARROW-5311](https://issues.apache.org/jira/browse/ARROW-5311) - [C++] Return more specific invalid Status in Take kernel
+* [ARROW-5313](https://issues.apache.org/jira/browse/ARROW-5313) - [Format] Comments on Field table are a bit confusing
+* [ARROW-5317](https://issues.apache.org/jira/browse/ARROW-5317) - [Rust] [Parquet] impl IntoIterator for SerializedFileReader
+* [ARROW-5319](https://issues.apache.org/jira/browse/ARROW-5319) - [CI] Enable ccache with MinGW builds
+* [ARROW-5321](https://issues.apache.org/jira/browse/ARROW-5321) - [Gandiva][C++] add isnull and isnotnull for utf8 and binary types
+* [ARROW-5323](https://issues.apache.org/jira/browse/ARROW-5323) - [CI] Use compression with clcache
+* [ARROW-5328](https://issues.apache.org/jira/browse/ARROW-5328) - [R] Add shell scripts to do a full package rebuild and test locally
+* [ARROW-5329](https://issues.apache.org/jira/browse/ARROW-5329) - Add support for building MATLAB interface to Feather directly within MATLAB
+* [ARROW-5334](https://issues.apache.org/jira/browse/ARROW-5334) - [C++] Add "Type" to names of arrow::Integer, arrow::FloatingPoint classes for consistency
+* [ARROW-5335](https://issues.apache.org/jira/browse/ARROW-5335) - [Python] Raise on variable dictionaries when converting to pandas
+* [ARROW-5339](https://issues.apache.org/jira/browse/ARROW-5339) - [C++] Add jemalloc to thirdparty dependency download script
+* [ARROW-5341](https://issues.apache.org/jira/browse/ARROW-5341) - [C++] Add instructions about fixing and testing for -Wdocumentation clang warnings locally
+* [ARROW-5342](https://issues.apache.org/jira/browse/ARROW-5342) - [Format] Formalize extension type metadata in IPC protocol
+* [ARROW-5346](https://issues.apache.org/jira/browse/ARROW-5346) - [C++] Revert changes to qualify duration in vendored date code
+* [ARROW-5349](https://issues.apache.org/jira/browse/ARROW-5349) - [Python/C++] Provide a way to specify the file path in parquet ColumnChunkMetaData
+* [ARROW-5361](https://issues.apache.org/jira/browse/ARROW-5361) - [R] Follow DictionaryType/DictionaryArray changes from ARROW-3144
+* [ARROW-5363](https://issues.apache.org/jira/browse/ARROW-5363) - [GLib] Fix coding styles
+* [ARROW-5364](https://issues.apache.org/jira/browse/ARROW-5364) - [C++] Use ASCII rather than UTF-8 in BuildUtils.cmake comment
+* [ARROW-5365](https://issues.apache.org/jira/browse/ARROW-5365) - [C++][CI] Add UBSan and ASAN into CI
+* [ARROW-5368](https://issues.apache.org/jira/browse/ARROW-5368) - [C++] Disable jemalloc by default with MinGW
+* [ARROW-5369](https://issues.apache.org/jira/browse/ARROW-5369) - [C++] Add support for glog on Windows
+* [ARROW-5370](https://issues.apache.org/jira/browse/ARROW-5370) - [C++] Detect system uriparser by default
+* [ARROW-5372](https://issues.apache.org/jira/browse/ARROW-5372) - [GLib] Add support for null/boolean values CSV read option
+* [ARROW-5378](https://issues.apache.org/jira/browse/ARROW-5378) - [C++] Add local FileSystem implementation
+* [ARROW-5380](https://issues.apache.org/jira/browse/ARROW-5380) - [C++] Fix and enable UBSan for unaligned accesses.
+* [ARROW-5384](https://issues.apache.org/jira/browse/ARROW-5384) - [Go] add FixedSizeList array
+* [ARROW-5389](https://issues.apache.org/jira/browse/ARROW-5389) - [C++] Add an internal temporary directory API
+* [ARROW-5392](https://issues.apache.org/jira/browse/ARROW-5392) - [C++][CI][MinGW] Disable static library build on AppVeyor
+* [ARROW-5393](https://issues.apache.org/jira/browse/ARROW-5393) - [R] Add tests and example for read\_parquet()
+* [ARROW-5395](https://issues.apache.org/jira/browse/ARROW-5395) - [C++] Utilize stream EOS in File format
+* [ARROW-5396](https://issues.apache.org/jira/browse/ARROW-5396) - [JS] Ensure reader and writer support files and streams with no RecordBatches
+* [ARROW-5401](https://issues.apache.org/jira/browse/ARROW-5401) - [CI] [C++] Print ccache statistics on Travis-CI
+* [ARROW-5404](https://issues.apache.org/jira/browse/ARROW-5404) - [C++] nonstd::string\_view conflicts with std::string\_view in c++17
+* [ARROW-5407](https://issues.apache.org/jira/browse/ARROW-5407) - [C++] Integration test Travis CI entry builds many unnecessary targets
+* [ARROW-5413](https://issues.apache.org/jira/browse/ARROW-5413) - [C++] CSV reader doesn't remove BOM
+* [ARROW-5415](https://issues.apache.org/jira/browse/ARROW-5415) - [Release] Release script should update R version everywhere
+* [ARROW-5416](https://issues.apache.org/jira/browse/ARROW-5416) - [Website] Add Homebrew to project installation page
+* [ARROW-5418](https://issues.apache.org/jira/browse/ARROW-5418) - [CI][R] Run code coverage and report to codecov.io
+* [ARROW-5420](https://issues.apache.org/jira/browse/ARROW-5420) - [Java] Implement or remove getCurrentSizeInBytes in VariableWidthVector
+* [ARROW-5427](https://issues.apache.org/jira/browse/ARROW-5427) - [Python] RangeIndex serialization change implications
+* [ARROW-5428](https://issues.apache.org/jira/browse/ARROW-5428) - [C++] Add option to set "read extent" in arrow::io::BufferedInputStream
+* [ARROW-5429](https://issues.apache.org/jira/browse/ARROW-5429) - [Java] Provide alternative buffer allocation policy
+* [ARROW-5432](https://issues.apache.org/jira/browse/ARROW-5432) - [Python] Add 'read\_at' method to pyarrow.NativeFile
+* [ARROW-5433](https://issues.apache.org/jira/browse/ARROW-5433) - [C++][Parquet] improve parquet-reader columns information
+* [ARROW-5434](https://issues.apache.org/jira/browse/ARROW-5434) - [Java] Introduce wrappers for backward compatibility for ArrowBuf changes in ARROW-3191
+* [ARROW-5436](https://issues.apache.org/jira/browse/ARROW-5436) - [Python] expose filters argument in parquet.read\_table
+* [ARROW-5438](https://issues.apache.org/jira/browse/ARROW-5438) - [JS] Utilize stream EOS in File format
+* [ARROW-5441](https://issues.apache.org/jira/browse/ARROW-5441) - [C++] Implement FindArrowFlight.cmake
+* [ARROW-5442](https://issues.apache.org/jira/browse/ARROW-5442) - [Website] Clarify what makes a release artifact "official"
+* [ARROW-5443](https://issues.apache.org/jira/browse/ARROW-5443) - [Gandiva][Crossbow] Turn parquet encryption off
+* [ARROW-5447](https://issues.apache.org/jira/browse/ARROW-5447) - [CI] [Ruby] CI is failed on AppVeyor
+* [ARROW-5449](https://issues.apache.org/jira/browse/ARROW-5449) - [C++] Local filesystem implementation: investigate Windows UNC paths
+* [ARROW-5451](https://issues.apache.org/jira/browse/ARROW-5451) - [C++][Gandiva] Add round functions for decimals
+* [ARROW-5452](https://issues.apache.org/jira/browse/ARROW-5452) - [R] Add documentation website (pkgdown)
+* [ARROW-5461](https://issues.apache.org/jira/browse/ARROW-5461) - [Java] Add micro-benchmarks for Float8Vector and allocators
+* [ARROW-5463](https://issues.apache.org/jira/browse/ARROW-5463) - [Rust] Implement AsRef for Buffer
+* [ARROW-5464](https://issues.apache.org/jira/browse/ARROW-5464) - [Archery] Bad --benchmark-filter default
+* [ARROW-5465](https://issues.apache.org/jira/browse/ARROW-5465) - [Crossbow] Support writing submitted job definition yaml to a file
+* [ARROW-5466](https://issues.apache.org/jira/browse/ARROW-5466) - [Java] Dockerize Java builds in Travis CI, run multiple JDKs in single entry
+* [ARROW-5467](https://issues.apache.org/jira/browse/ARROW-5467) - [Go] implement read/write IPC for Time32/Time64 arrays
+* [ARROW-5468](https://issues.apache.org/jira/browse/ARROW-5468) - [Go] implement read/write IPC for Timestamp arrays
+* [ARROW-5469](https://issues.apache.org/jira/browse/ARROW-5469) - [Go] implement read/write IPC for Date32/Date64 arrays
+* [ARROW-5470](https://issues.apache.org/jira/browse/ARROW-5470) - [CI] C++ local filesystem patch breaks Travis R job
+* [ARROW-5472](https://issues.apache.org/jira/browse/ARROW-5472) - [Development] Add warning to PR merge tool if no JIRA component is set
+* [ARROW-5474](https://issues.apache.org/jira/browse/ARROW-5474) - [C++] Document required Boost version
+* [ARROW-5476](https://issues.apache.org/jira/browse/ARROW-5476) - [Java][Memory] Fix Netty ArrowBuf Slice
+* [ARROW-5477](https://issues.apache.org/jira/browse/ARROW-5477) - [C++] Check required RapidJSON version
+* [ARROW-5478](https://issues.apache.org/jira/browse/ARROW-5478) - [Packaging] Drop Ubuntu 14.04 support
+* [ARROW-5481](https://issues.apache.org/jira/browse/ARROW-5481) - [GLib] garrow\_seekable\_input\_stream\_peek() misses "error" parameter document
+* [ARROW-5483](https://issues.apache.org/jira/browse/ARROW-5483) - [Java] add ValueVector constructors that take a Field object
+* [ARROW-5485](https://issues.apache.org/jira/browse/ARROW-5485) - [Gandiva][Crossbow] OSx builds failing
+* [ARROW-5486](https://issues.apache.org/jira/browse/ARROW-5486) - [GLib] Add binding of gandiva::FunctionRegistry and related things
+* [ARROW-5488](https://issues.apache.org/jira/browse/ARROW-5488) - [R] Workaround when C++ lib not available
+* [ARROW-5490](https://issues.apache.org/jira/browse/ARROW-5490) - [C++] Remove ARROW\_BOOST\_HEADER\_ONLY
+* [ARROW-5491](https://issues.apache.org/jira/browse/ARROW-5491) - [C++] Remove unecessary semicolons following MACRO definitions
+* [ARROW-5492](https://issues.apache.org/jira/browse/ARROW-5492) - [R] Add "col\_select" argument to read\_\* functions to read subset of columns
+* [ARROW-5495](https://issues.apache.org/jira/browse/ARROW-5495) - [C++] Use HTTPS consistently for downloading dependencies
+* [ARROW-5496](https://issues.apache.org/jira/browse/ARROW-5496) - [R][CI] Fix relative paths in R codecov.io reporting
+* [ARROW-5498](https://issues.apache.org/jira/browse/ARROW-5498) - [C++] Build failure with Flatbuffers 1.11.0 and MinGW
+* [ARROW-5500](https://issues.apache.org/jira/browse/ARROW-5500) - [R] read\_csv\_arrow() signature should match readr::read\_csv()
+* [ARROW-5503](https://issues.apache.org/jira/browse/ARROW-5503) - [R] add read\_json()
+* [ARROW-5504](https://issues.apache.org/jira/browse/ARROW-5504) - [R] move use\_threads argument to global option
+* [ARROW-5509](https://issues.apache.org/jira/browse/ARROW-5509) - [R] write\_parquet()
+* [ARROW-5511](https://issues.apache.org/jira/browse/ARROW-5511) - [Packaging] Enable Flight in Conda packages
+* [ARROW-5512](https://issues.apache.org/jira/browse/ARROW-5512) - [C++] Draft initial public APIs for Datasets project
+* [ARROW-5513](https://issues.apache.org/jira/browse/ARROW-5513) - [Java] Refactor method name for getstartOffset to use camel case
+* [ARROW-5516](https://issues.apache.org/jira/browse/ARROW-5516) - [Python] Development page for pyarrow has a missing dependency in using pip
+* [ARROW-5518](https://issues.apache.org/jira/browse/ARROW-5518) - [Java] Set VectorSchemaRoot rowCount to 0 on allocateNew and clear
+* [ARROW-5524](https://issues.apache.org/jira/browse/ARROW-5524) - [C++] Turn off PARQUET\_BUILD\_ENCRYPTION in CMake if OpenSSL not found
+* [ARROW-5526](https://issues.apache.org/jira/browse/ARROW-5526) - [Developer] Add more prominent notice to GitHub issue template to direct bug reports to JIRA
+* [ARROW-5529](https://issues.apache.org/jira/browse/ARROW-5529) - [Flight] Allow serving with multiple TLS certificates
+* [ARROW-5531](https://issues.apache.org/jira/browse/ARROW-5531) - [Python] Support binary, utf8, and nested types in Array.from\_buffers
+* [ARROW-5533](https://issues.apache.org/jira/browse/ARROW-5533) - [Plasma] Plasma client should be thread-safe
+* [ARROW-5534](https://issues.apache.org/jira/browse/ARROW-5534) - [GLib] Add garrow\_table\_concatenate()
+* [ARROW-5535](https://issues.apache.org/jira/browse/ARROW-5535) - [GLib] Add garrow\_table\_slice()
+* [ARROW-5537](https://issues.apache.org/jira/browse/ARROW-5537) - [JS] Support delta dictionaries in RecordBatchWriter and DictionaryBuilder
+* [ARROW-5538](https://issues.apache.org/jira/browse/ARROW-5538) - [C++] Restrict minimum OpenSSL version to 1.0.2
+* [ARROW-5541](https://issues.apache.org/jira/browse/ARROW-5541) - [R] cast from negative int32 to uint32 and uint64 are now safe
+* [ARROW-5544](https://issues.apache.org/jira/browse/ARROW-5544) - [Archery] should not return non-zero in \`benchmark diff\` sub command on regression
+* [ARROW-5545](https://issues.apache.org/jira/browse/ARROW-5545) - [C++][Docs] Clarify expectation of UTC values for timestamps with time zones in C++ API docs
+* [ARROW-5547](https://issues.apache.org/jira/browse/ARROW-5547) - [C++][FlightRPC] arrow-flight.pc isn't provided
+* [ARROW-5552](https://issues.apache.org/jira/browse/ARROW-5552) - [Go] make Schema and Field implement Stringer
+* [ARROW-5554](https://issues.apache.org/jira/browse/ARROW-5554) - Add a python wrapper for arrow::Concatenate
+* [ARROW-5555](https://issues.apache.org/jira/browse/ARROW-5555) - [R] Add install\_arrow() function to assist the user in obtaining C++ runtime libraries
+* [ARROW-5556](https://issues.apache.org/jira/browse/ARROW-5556) - [Doc] Document JSON reader
+* [ARROW-5557](https://issues.apache.org/jira/browse/ARROW-5557) - [C++] Investigate performance of VisitBitsUnrolled on different platforms
+* [ARROW-5564](https://issues.apache.org/jira/browse/ARROW-5564) - [C++] Add uriparser to conda-forge
+* [ARROW-5565](https://issues.apache.org/jira/browse/ARROW-5565) - [Python] Document how to use gdb when working on pyarrow
+* [ARROW-5567](https://issues.apache.org/jira/browse/ARROW-5567) - [C++]  Fix build error of memory-benchmark
+* [ARROW-5574](https://issues.apache.org/jira/browse/ARROW-5574) - [R] documentation error for read\_arrow()
+* [ARROW-5580](https://issues.apache.org/jira/browse/ARROW-5580) - Correct definitions of timestamp functions in Gandiva
+* [ARROW-5581](https://issues.apache.org/jira/browse/ARROW-5581) - [Java] Provide interfaces and initial implementations for vector sorting
+* [ARROW-5582](https://issues.apache.org/jira/browse/ARROW-5582) - [Go] add support for comparing Records
+* [ARROW-5586](https://issues.apache.org/jira/browse/ARROW-5586) - [R] convert Array of LIST type to R lists
+* [ARROW-5587](https://issues.apache.org/jira/browse/ARROW-5587) - [Java] Add more maven style check for Java code
+* [ARROW-5590](https://issues.apache.org/jira/browse/ARROW-5590) - [R] Run "no libarrow" R build in the same CI entry if possible
+* [ARROW-5591](https://issues.apache.org/jira/browse/ARROW-5591) - [Go] implement read/write IPC for Duration & Intervals
+* [ARROW-5597](https://issues.apache.org/jira/browse/ARROW-5597) - [Packaging][deb] Add Flight packages
+* [ARROW-5600](https://issues.apache.org/jira/browse/ARROW-5600) - [R] R package namespace cleanup
+* [ARROW-5602](https://issues.apache.org/jira/browse/ARROW-5602) - [Java][Gandiva] Add test for decimal round functions
+* [ARROW-5604](https://issues.apache.org/jira/browse/ARROW-5604) - [Go] improve test coverage of type-traits
+* [ARROW-5609](https://issues.apache.org/jira/browse/ARROW-5609) - [C++] Set CMP0068 CMake policy to avoid macOS warnings
+* [ARROW-5612](https://issues.apache.org/jira/browse/ARROW-5612) - [Python][Documentation] Clarify date\_as\_object option behavior
+* [ARROW-5621](https://issues.apache.org/jira/browse/ARROW-5621) - [Go] implement read/write IPC for Decimal128 arrays
+* [ARROW-5622](https://issues.apache.org/jira/browse/ARROW-5622) - [C++][Dataset] arrow-dataset.pc isn't provided
+* [ARROW-5625](https://issues.apache.org/jira/browse/ARROW-5625) - [R] convert Array of struct type to data frame columns
+* [ARROW-5632](https://issues.apache.org/jira/browse/ARROW-5632) - [Doc] Add some documentation describing compile/debug workflow on macOS with Xcode IDE
+* [ARROW-5633](https://issues.apache.org/jira/browse/ARROW-5633) - [Python] Enable bz2 in Linux wheels
+* [ARROW-5635](https://issues.apache.org/jira/browse/ARROW-5635) - [C++] Support "compacting" a table
+* [ARROW-5637](https://issues.apache.org/jira/browse/ARROW-5637) - [Gandiva] [Java]Complete IN Expression
+* [ARROW-5639](https://issues.apache.org/jira/browse/ARROW-5639) - [Java] Remove floating point computation from getOffsetBufferValueCapacity
+* [ARROW-5641](https://issues.apache.org/jira/browse/ARROW-5641) - [GLib] Remove enums files generated by GNU Autotools from Git targets
+* [ARROW-5643](https://issues.apache.org/jira/browse/ARROW-5643) - [Flight] Add ability to override hostname checking
+* [ARROW-5650](https://issues.apache.org/jira/browse/ARROW-5650) - [Python] Update manylinux dependency versions
+* [ARROW-5652](https://issues.apache.org/jira/browse/ARROW-5652) - [CI] Fix iwyu docker image
+* [ARROW-5656](https://issues.apache.org/jira/browse/ARROW-5656) - [Python] Enable Flight wheels on macOS
+* [ARROW-5659](https://issues.apache.org/jira/browse/ARROW-5659) - [C++] Add support for finding OpenSSL installed by Homebrew
+* [ARROW-5660](https://issues.apache.org/jira/browse/ARROW-5660) - [GLib][CI] Use the latest macOS image and all Homebrew based libraries
+* [ARROW-5661](https://issues.apache.org/jira/browse/ARROW-5661) - Support hash functions for decimal in Gandiva
+* [ARROW-5662](https://issues.apache.org/jira/browse/ARROW-5662) - [C++] Add support for BOOST\_SOURCE=AUTO|BUNDLED|SYSTEM
+* [ARROW-5663](https://issues.apache.org/jira/browse/ARROW-5663) - [Packaging][RPM] Update CentOS packages for 0.14.0
+* [ARROW-5664](https://issues.apache.org/jira/browse/ARROW-5664) - [Crossbow] Execute nightly crossbow tests on CircleCI instead of Travis
+* [ARROW-5668](https://issues.apache.org/jira/browse/ARROW-5668) - [Python] Display "not null" in Schema.\_\_repr\_\_ for non-nullable fields
+* [ARROW-5669](https://issues.apache.org/jira/browse/ARROW-5669) - [Crossbow] manylinux1 wheel building failing
+* [ARROW-5670](https://issues.apache.org/jira/browse/ARROW-5670) - [Crossbow] get\_apache\_mirror.py fails with TLS error on macOS with Python 3.5
+* [ARROW-5671](https://issues.apache.org/jira/browse/ARROW-5671) - [crossbow] mac os python wheels failing
+* [ARROW-5672](https://issues.apache.org/jira/browse/ARROW-5672) - [Java] Refactor redundant method modifier
+* [ARROW-5683](https://issues.apache.org/jira/browse/ARROW-5683) - [R] Add snappy to Rtools Windows builds
+* [ARROW-5684](https://issues.apache.org/jira/browse/ARROW-5684) - [Packaging][deb] Add support for Ubuntu 19.04
+* [ARROW-5685](https://issues.apache.org/jira/browse/ARROW-5685) - [Packaging][deb] Add support for Apache Arrow Datasets
+* [ARROW-5687](https://issues.apache.org/jira/browse/ARROW-5687) - [C++] Remove remaining uses of ARROW\_BOOST\_VENDORED
+* [ARROW-5690](https://issues.apache.org/jira/browse/ARROW-5690) - [Packaging][Python] macOS wheels broken: libprotobuf.18.dylib missing
+* [ARROW-5694](https://issues.apache.org/jira/browse/ARROW-5694) - [Python] List of decimals are not supported when converting to pandas
+* [ARROW-5695](https://issues.apache.org/jira/browse/ARROW-5695) - [C#][Release] Run sourcelink test in verify-release-candidate.sh
+* [ARROW-5696](https://issues.apache.org/jira/browse/ARROW-5696) - [Gandiva] [C++] Introduce castVarcharVarchar
+* [ARROW-5699](https://issues.apache.org/jira/browse/ARROW-5699) - [C++] Optimize parsing of Decimal128 in CSV
+* [ARROW-5701](https://issues.apache.org/jira/browse/ARROW-5701) - [C++][Gandiva] Build expressions only for the required selection vector types
+* [ARROW-5702](https://issues.apache.org/jira/browse/ARROW-5702) - [C++] parquet::arrow::FileReader::GetSchema()
+* [ARROW-5704](https://issues.apache.org/jira/browse/ARROW-5704) - [C++] Stop using ARROW\_TEMPLATE\_EXPORT for SparseTensorImpl class
+* [ARROW-5705](https://issues.apache.org/jira/browse/ARROW-5705) - [Java] Optimize BaseValueVector#computeCombinedBufferSize logic
+* [ARROW-5706](https://issues.apache.org/jira/browse/ARROW-5706) - [Java] Remove type conversion in getValidityBufferValueCapacity
+* [ARROW-5707](https://issues.apache.org/jira/browse/ARROW-5707) - [Java] Improve the performance and code structure for ArrowRecordBatch
+* [ARROW-5710](https://issues.apache.org/jira/browse/ARROW-5710) - [C++] Allow compiling Gandiva with Ninja on Windows
+* [ARROW-5718](https://issues.apache.org/jira/browse/ARROW-5718) - [R] auto splice data frames in record\_batch() and table()
+* [ARROW-5721](https://issues.apache.org/jira/browse/ARROW-5721) - [Rust] Move array related code into a separate module
+* [ARROW-5724](https://issues.apache.org/jira/browse/ARROW-5724) - [R] [CI] AppVeyor build should use ccache
+* [ARROW-5725](https://issues.apache.org/jira/browse/ARROW-5725) - [Crossbow] Port conda recipes to azure pipelines
+* [ARROW-5726](https://issues.apache.org/jira/browse/ARROW-5726) - [Java] Implement a common interface for int vectors
+* [ARROW-5727](https://issues.apache.org/jira/browse/ARROW-5727) - [Python] [CI] Install pytest-faulthandler before running tests
+* [ARROW-5748](https://issues.apache.org/jira/browse/ARROW-5748) - [Packaging][deb] Add support for Debian GNU/Linux buster
+* [ARROW-5749](https://issues.apache.org/jira/browse/ARROW-5749) - [Python] Add Python binding for Table::CombineChunks()
+* [ARROW-5751](https://issues.apache.org/jira/browse/ARROW-5751) - [Packaging][Python] Python macOS wheels have dynamic dependency on libcares
+* [ARROW-5752](https://issues.apache.org/jira/browse/ARROW-5752) - [Java] Improve the performance of ArrowBuf#setZero
+* [ARROW-5755](https://issues.apache.org/jira/browse/ARROW-5755) - [Rust] [Parquet] Add derived clone for Type
+* [ARROW-5768](https://issues.apache.org/jira/browse/ARROW-5768) - [Release] There are needless newlines at the end of CHANGELOG.md
+* [ARROW-5773](https://issues.apache.org/jira/browse/ARROW-5773) - [R] Clean up documentation before release
+* [ARROW-5780](https://issues.apache.org/jira/browse/ARROW-5780) - [C++] Add benchmark for Decimal128 operations
+* [ARROW-5782](https://issues.apache.org/jira/browse/ARROW-5782) - [Release] Setup test data for Flight in dev/release/01-perform.sh
+* [ARROW-5783](https://issues.apache.org/jira/browse/ARROW-5783) - [Release][C#] Exclude dummy.git from RAT check
+* [ARROW-5785](https://issues.apache.org/jira/browse/ARROW-5785) - Rust datafusion implementation should not depend on rustyline
+* [ARROW-5787](https://issues.apache.org/jira/browse/ARROW-5787) - [Release][Rust] Use local modules to verify RC
+* [ARROW-5793](https://issues.apache.org/jira/browse/ARROW-5793) - [Release] Avoid duplicate known host SSH error in dev/release/03-binary.sh
+* [ARROW-5794](https://issues.apache.org/jira/browse/ARROW-5794) - [Release] Skip uploading already uploaded binaries
+* [ARROW-5795](https://issues.apache.org/jira/browse/ARROW-5795) - [Release] Add missing waits on uploading binaries
+* [ARROW-5796](https://issues.apache.org/jira/browse/ARROW-5796) - [Release][APT] Update expected package list
+* [ARROW-5797](https://issues.apache.org/jira/browse/ARROW-5797) - [Release][APT] Update supported distributions
+* [ARROW-5813](https://issues.apache.org/jira/browse/ARROW-5813) - [C++] Support checking the equality of the different contiguous tensors
+* [ARROW-5818](https://issues.apache.org/jira/browse/ARROW-5818) - [Java][Gandiva] support varlen output vectors
+* [ARROW-5820](https://issues.apache.org/jira/browse/ARROW-5820) - [Release] Remove undefined variable check from verify script
+* [ARROW-653](https://issues.apache.org/jira/browse/ARROW-653) - [Python / C++] Add debugging function to print an array's buffer contents in hexadecimal
+* [ARROW-767](https://issues.apache.org/jira/browse/ARROW-767) - [C++] Adopt FileSystem abstraction
+* [ARROW-835](https://issues.apache.org/jira/browse/ARROW-835) - [Format] Add Timedelta type to describe time intervals
+* [ARROW-840](https://issues.apache.org/jira/browse/ARROW-840) - [Python] Provide Python API for creating user-defined data types that can survive Arrow IPC
+* [ARROW-973](https://issues.apache.org/jira/browse/ARROW-973) - [Website] Add FAQ page about project
+
+### Bug Fixes
+
+* [ARROW-1837](https://issues.apache.org/jira/browse/ARROW-1837) - [Java] Unable to read unsigned integers outside signed range for bit width in integration tests
+* [ARROW-2119](https://issues.apache.org/jira/browse/ARROW-2119) - [C++][Java] Handle Arrow stream with zero record batch
+* [ARROW-2136](https://issues.apache.org/jira/browse/ARROW-2136) - [Python] Non-nullable schema fields not checked in conversions from pandas
+* [ARROW-2256](https://issues.apache.org/jira/browse/ARROW-2256) - [C++] Fuzzer builds fail out of the box on Ubuntu 16.04 using LLVM apt repos
+* [ARROW-2461](https://issues.apache.org/jira/browse/ARROW-2461) - [Python] Build wheels for manylinux2010 tag
+* [ARROW-3344](https://issues.apache.org/jira/browse/ARROW-3344) - [Python] test\_plasma.py fails (in test\_plasma\_list)
+* [ARROW-3399](https://issues.apache.org/jira/browse/ARROW-3399) - [Python] Cannot serialize numpy matrix object
+* [ARROW-3650](https://issues.apache.org/jira/browse/ARROW-3650) - [Python] Mixed column indexes are read back as strings
+* [ARROW-3762](https://issues.apache.org/jira/browse/ARROW-3762) - [C++] Parquet arrow::Table reads error when overflowing capacity of BinaryArray
+* [ARROW-4021](https://issues.apache.org/jira/browse/ARROW-4021) - [Ruby] Error building red-arrow on msys2
+* [ARROW-4076](https://issues.apache.org/jira/browse/ARROW-4076) - [Python] schema validation and filters
+* [ARROW-4139](https://issues.apache.org/jira/browse/ARROW-4139) - [Python] Cast Parquet column statistics to unicode if UTF8 ConvertedType is set
+* [ARROW-4301](https://issues.apache.org/jira/browse/ARROW-4301) - [Java][Gandiva] Maven snapshot version update does not seem to update Gandiva submodule
+* [ARROW-4324](https://issues.apache.org/jira/browse/ARROW-4324) - [Python] Array dtype inference incorrect when created from list of mixed numpy scalars
+* [ARROW-4350](https://issues.apache.org/jira/browse/ARROW-4350) - [Python] dtype=object arrays cannot be converted to a list-of-list ListArray
+* [ARROW-4447](https://issues.apache.org/jira/browse/ARROW-4447) - [C++] Investigate dynamic linking for libthift
+* [ARROW-4516](https://issues.apache.org/jira/browse/ARROW-4516) - [Python] Error while creating a ParquetDataset on a path without \`\_common\_dataset\` but with an empty \`\_tempfile\`
+* [ARROW-4523](https://issues.apache.org/jira/browse/ARROW-4523) - [JS] Add row proxy generation benchmark
+* [ARROW-4651](https://issues.apache.org/jira/browse/ARROW-4651) - [Format] Flight Location should be more flexible than a (host, port) pair
+* [ARROW-4675](https://issues.apache.org/jira/browse/ARROW-4675) - [Python] Error serializing bool ndarray in py2 and deserializing in py3
+* [ARROW-4694](https://issues.apache.org/jira/browse/ARROW-4694) - [CI] detect-changes.py is inconsistent
+* [ARROW-4723](https://issues.apache.org/jira/browse/ARROW-4723) - [Python] Skip \_files when reading a directory containing parquet files
+* [ARROW-4725](https://issues.apache.org/jira/browse/ARROW-4725) - [C++] Dictionary tests disabled under MinGW builds
+* [ARROW-4823](https://issues.apache.org/jira/browse/ARROW-4823) - [Python] read\_csv shouldn't close file handles it doesn't own
+* [ARROW-4845](https://issues.apache.org/jira/browse/ARROW-4845) - [R] Compiler warnings on Windows MingW64
+* [ARROW-4851](https://issues.apache.org/jira/browse/ARROW-4851) - [Java] BoundsChecking.java defaulting behavior for old drill parameter seems off
+* [ARROW-4885](https://issues.apache.org/jira/browse/ARROW-4885) - [Python] read\_csv() can't handle decimal128 columns
+* [ARROW-4886](https://issues.apache.org/jira/browse/ARROW-4886) - [Rust] Inconsistent behaviour with casting sliced primitive array to list array
+* [ARROW-4923](https://issues.apache.org/jira/browse/ARROW-4923) - Expose setters for Decimal vector that take long and double inputs
+* [ARROW-4934](https://issues.apache.org/jira/browse/ARROW-4934) - [Python] Address deprecation notice that will be a bug in Python 3.8
+* [ARROW-5019](https://issues.apache.org/jira/browse/ARROW-5019) - [C#] ArrowStreamWriter doesn't work on a non-seekable stream
+* [ARROW-5049](https://issues.apache.org/jira/browse/ARROW-5049) - [Python] org/apache/hadoop/fs/FileSystem class not found when pyarrow FileSystem used in spark
+* [ARROW-5051](https://issues.apache.org/jira/browse/ARROW-5051) - [GLib][Gandiva] Test failure in release verification script
+* [ARROW-5058](https://issues.apache.org/jira/browse/ARROW-5058) - [Release] 02-source.sh generates e-mail template with wrong links
+* [ARROW-5068](https://issues.apache.org/jira/browse/ARROW-5068) - [Gandiva][Packaging] Fix gandiva nightly builds after the CMake refactor
+* [ARROW-5090](https://issues.apache.org/jira/browse/ARROW-5090) - Parquet linking fails on MacOS due to @rpath in dylib
+* [ARROW-5092](https://issues.apache.org/jira/browse/ARROW-5092) - [C#] Source Link doesn't work with the C# release script
+* [ARROW-5095](https://issues.apache.org/jira/browse/ARROW-5095) - [Flight][C++] Flight DoGet doesn't expose server error message
+* [ARROW-5096](https://issues.apache.org/jira/browse/ARROW-5096) - [Packaging][deb] plasma-store-server packages are missing
+* [ARROW-5097](https://issues.apache.org/jira/browse/ARROW-5097) - [Packaging][CentOS6] arrow-lib has unresolvable dependencies
+* [ARROW-5098](https://issues.apache.org/jira/browse/ARROW-5098) - [Website] Update APT install document for 0.13.0
+* [ARROW-5100](https://issues.apache.org/jira/browse/ARROW-5100) - [JS] Writer swaps byte order if buffers share the same underlying ArrayBuffer
+* [ARROW-5117](https://issues.apache.org/jira/browse/ARROW-5117) - [Go] Panic when appending zero slices after initializing a builder
+* [ARROW-5119](https://issues.apache.org/jira/browse/ARROW-5119) - [Go] invalid Stringer implementation for array.Boolean
+* [ARROW-5129](https://issues.apache.org/jira/browse/ARROW-5129) - [Rust][Parquet] Column writer bug: check dictionary encoder when adding a new data page
+* [ARROW-5130](https://issues.apache.org/jira/browse/ARROW-5130) - [Python] Segfault when importing TensorFlow after Pyarrow
+* [ARROW-5132](https://issues.apache.org/jira/browse/ARROW-5132) - [Java] Errors on building gandiva\_jni.dll on Windows with Visual Studio 2017
+* [ARROW-5138](https://issues.apache.org/jira/browse/ARROW-5138) - [Python/C++] Row group retrieval doesn't restore index properly
+* [ARROW-5142](https://issues.apache.org/jira/browse/ARROW-5142) - [CI] Fix conda calls in AppVeyor scripts
+* [ARROW-5144](https://issues.apache.org/jira/browse/ARROW-5144) - [Python] ParquetDataset and ParquetPiece not serializable
+* [ARROW-5146](https://issues.apache.org/jira/browse/ARROW-5146) - [Dev] Merge script imposes directory name
+* [ARROW-5147](https://issues.apache.org/jira/browse/ARROW-5147) - [C++] get an error in building: Could NOT find DoubleConversion
+* [ARROW-5148](https://issues.apache.org/jira/browse/ARROW-5148) - [CI] [C++] LLVM-related compile errors
+* [ARROW-5149](https://issues.apache.org/jira/browse/ARROW-5149) - [Packaging][Wheel] Pin LLVM to version 7 in windows builds
+* [ARROW-5152](https://issues.apache.org/jira/browse/ARROW-5152) - [Python] CMake warnings when building
+* [ARROW-5159](https://issues.apache.org/jira/browse/ARROW-5159) - Unable to build benches in arrow crate.
+* [ARROW-5160](https://issues.apache.org/jira/browse/ARROW-5160) - [C++] ABORT\_NOT\_OK evalutes expression twice
+* [ARROW-5166](https://issues.apache.org/jira/browse/ARROW-5166) - [Python][Parquet] Statistics for uint64 columns may overflow
+* [ARROW-5167](https://issues.apache.org/jira/browse/ARROW-5167) - [C++] Upgrade string-view-light to latest
+* [ARROW-5169](https://issues.apache.org/jira/browse/ARROW-5169) - [Python] non-nullable fields are converted to nullable in Table.from\_pandas
+* [ARROW-5173](https://issues.apache.org/jira/browse/ARROW-5173) - [Go] handle multiple concatenated streams back-to-back
+* [ARROW-5174](https://issues.apache.org/jira/browse/ARROW-5174) - [Go] implement Stringer for DataTypes
+* [ARROW-5177](https://issues.apache.org/jira/browse/ARROW-5177) - [Python] ParquetReader.read\_column() doesn't check bounds
+* [ARROW-5183](https://issues.apache.org/jira/browse/ARROW-5183) - [CI] MinGW build failures on AppVeyor
+* [ARROW-5184](https://issues.apache.org/jira/browse/ARROW-5184) - [Rust] Broken links and other documentation warnings
+* [ARROW-5194](https://issues.apache.org/jira/browse/ARROW-5194) - [C++][Plasma] TEST(PlasmaSerialization, GetReply) is failing
+* [ARROW-5195](https://issues.apache.org/jira/browse/ARROW-5195) - [Python] read\_csv ignores null\_values on string types
+* [ARROW-5201](https://issues.apache.org/jira/browse/ARROW-5201) - [Python] Import ABCs from collections is deprecated in Python 3.7
+* [ARROW-5208](https://issues.apache.org/jira/browse/ARROW-5208) - [Python] Inconsistent resulting type during casting in pa.array() when mask is present
+* [ARROW-5214](https://issues.apache.org/jira/browse/ARROW-5214) - [C++] Offline dependency downloader misses some libraries
+* [ARROW-5217](https://issues.apache.org/jira/browse/ARROW-5217) - [Rust] [CI] DataFusion test failure
+* [ARROW-5232](https://issues.apache.org/jira/browse/ARROW-5232) - [Java] value vector size increases rapidly in case of clear/setSafe loop
+* [ARROW-5233](https://issues.apache.org/jira/browse/ARROW-5233) - [Go] migrate to new flatbuffers-v1.11.0
+* [ARROW-5237](https://issues.apache.org/jira/browse/ARROW-5237) - [Python] pandas\_version key in pandas metadata no longer populated
+* [ARROW-5240](https://issues.apache.org/jira/browse/ARROW-5240) - [C++][CI] cmake\_format 0.5.0 appears to fail the build
+* [ARROW-5242](https://issues.apache.org/jira/browse/ARROW-5242) - [C++] Arrow doesn't compile cleanly with Visual Studio 2017 Update 9 or later due to narrowing
+* [ARROW-5243](https://issues.apache.org/jira/browse/ARROW-5243) - [Java][Gandiva] Add test for decimal compare functions
+* [ARROW-5245](https://issues.apache.org/jira/browse/ARROW-5245) - [C++][CI] Unpin cmake\_format
+* [ARROW-5246](https://issues.apache.org/jira/browse/ARROW-5246) - [Go] use Go-1.12 in CI
+* [ARROW-5249](https://issues.apache.org/jira/browse/ARROW-5249) - [Java] Flight client doesn't handle auth correctly in some cases
+* [ARROW-5253](https://issues.apache.org/jira/browse/ARROW-5253) - [C++] external Snappy fails on Alpine
+* [ARROW-5254](https://issues.apache.org/jira/browse/ARROW-5254) - [Flight][Java] DoAction does not support result streams
+* [ARROW-5255](https://issues.apache.org/jira/browse/ARROW-5255) - [Java] Implement user-defined data types API
+* [ARROW-5260](https://issues.apache.org/jira/browse/ARROW-5260) - [Python][C++] Crash when deserializing from components in a fresh new process
+* [ARROW-5274](https://issues.apache.org/jira/browse/ARROW-5274) - [JavaScript] Wrong array type for countBy
+* [ARROW-5285](https://issues.apache.org/jira/browse/ARROW-5285) - [C++][Plasma] GpuProcessHandle is not released when GPU object deleted
+* [ARROW-5293](https://issues.apache.org/jira/browse/ARROW-5293) - [C++] Take kernel on DictionaryArray does not preserve ordered flag
+* [ARROW-5294](https://issues.apache.org/jira/browse/ARROW-5294) - [CI] setuptools\_scm failures
+* [ARROW-5296](https://issues.apache.org/jira/browse/ARROW-5296) - [Java] Sporadic Flight test failures
+* [ARROW-5301](https://issues.apache.org/jira/browse/ARROW-5301) - [Python] parquet documentation outdated on nthreads argument
+* [ARROW-5306](https://issues.apache.org/jira/browse/ARROW-5306) - [CI] [GLib] Disable GTK-Doc
+* [ARROW-5308](https://issues.apache.org/jira/browse/ARROW-5308) - [Go] remove deprecated Feather format
+* [ARROW-5314](https://issues.apache.org/jira/browse/ARROW-5314) - [Go] Incorrect Printing for String Arrays with Offsets
+* [ARROW-5325](https://issues.apache.org/jira/browse/ARROW-5325) - [Archery][Benchmark] Output properly formatted jsonlines from benchmark diff cli command
+* [ARROW-5330](https://issues.apache.org/jira/browse/ARROW-5330) - [Python] [CI] Run Python Flight tests on Travis-CI
+* [ARROW-5332](https://issues.apache.org/jira/browse/ARROW-5332) - [R] R package fails to build/install: error in dyn.load()
+* [ARROW-5348](https://issues.apache.org/jira/browse/ARROW-5348) - [CI] [Java] Gandiva checkstyle failure
+* [ARROW-5360](https://issues.apache.org/jira/browse/ARROW-5360) - [Rust] Builds are broken by rustyline on nightly 2019-05-16+
+* [ARROW-5362](https://issues.apache.org/jira/browse/ARROW-5362) - [C++] Compression round trip test can cause some sanitizers to to fail
+* [ARROW-5371](https://issues.apache.org/jira/browse/ARROW-5371) - [Release] Add tests for dev/release/00-prepare.sh
+* [ARROW-5373](https://issues.apache.org/jira/browse/ARROW-5373) - [Java] Add missing details for Gandiva Java Build
+* [ARROW-5376](https://issues.apache.org/jira/browse/ARROW-5376) - [C++] Compile failure on gcc 5.4.0
+* [ARROW-5383](https://issues.apache.org/jira/browse/ARROW-5383) - [Go] update IPC flatbuf (new Duration type)
+* [ARROW-5387](https://issues.apache.org/jira/browse/ARROW-5387) - [Go] properly handle sub-slice of List
+* [ARROW-5388](https://issues.apache.org/jira/browse/ARROW-5388) - [Go] use arrow.TypeEqual in array.NewChunked
+* [ARROW-5390](https://issues.apache.org/jira/browse/ARROW-5390) - [CI] Job time limit exceeded on Travis
+* [ARROW-5397](https://issues.apache.org/jira/browse/ARROW-5397) - Test Flight TLS support
+* [ARROW-5398](https://issues.apache.org/jira/browse/ARROW-5398) - [Python] Flight tests broken by URI changes
+* [ARROW-5403](https://issues.apache.org/jira/browse/ARROW-5403) - [C++] Test failures not propagated in Windows shared builds
+* [ARROW-5411](https://issues.apache.org/jira/browse/ARROW-5411) - [C++][Python] Build error building on Mac OS Mojave
+* [ARROW-5412](https://issues.apache.org/jira/browse/ARROW-5412) - [Java] Integration test fails with UnsupportedOperationException
+* [ARROW-5419](https://issues.apache.org/jira/browse/ARROW-5419) - [C++] CSV strings\_can\_be\_null option doesn't respect all null\_values
+* [ARROW-5421](https://issues.apache.org/jira/browse/ARROW-5421) - [Packaging][Crossbow] Duplicated key in nightly test configuration
+* [ARROW-5430](https://issues.apache.org/jira/browse/ARROW-5430) - [Python] Can read but not write parquet partitioned on large ints
+* [ARROW-5435](https://issues.apache.org/jira/browse/ARROW-5435) - [Java] add test for IntervalYearVector#getAsStringBuilder
+* [ARROW-5437](https://issues.apache.org/jira/browse/ARROW-5437) - [Python] Missing pandas pytest marker from parquet tests
+* [ARROW-5446](https://issues.apache.org/jira/browse/ARROW-5446) - [C++] Use cmake header install directory instead of include
+* [ARROW-5448](https://issues.apache.org/jira/browse/ARROW-5448) - [CI] MinGW build failures on AppVeyor
+* [ARROW-5453](https://issues.apache.org/jira/browse/ARROW-5453) - [C++] Just-released cmake-format 0.5.2 breaks the build
+* [ARROW-5455](https://issues.apache.org/jira/browse/ARROW-5455) - [Rust] Build broken by 2019-05-30 Rust nightly
+* [ARROW-5456](https://issues.apache.org/jira/browse/ARROW-5456) - [GLib][Plasma] Installed plasma-glib may be used on building document
+* [ARROW-5457](https://issues.apache.org/jira/browse/ARROW-5457) - [GLib][Plasma] Environment variable name for test is wrong
+* [ARROW-5459](https://issues.apache.org/jira/browse/ARROW-5459) - [Go] implement Stringer for Float16 DataType
+* [ARROW-5462](https://issues.apache.org/jira/browse/ARROW-5462) - [Go] support writing zero-length List
+* [ARROW-5479](https://issues.apache.org/jira/browse/ARROW-5479) - [Rust] [DataFusion] Use ARROW\_TEST\_DATA instead of relative path for testing
+* [ARROW-5487](https://issues.apache.org/jira/browse/ARROW-5487) - [CI] [Python] Failure in docs build
+* [ARROW-5493](https://issues.apache.org/jira/browse/ARROW-5493) - [Integration/Go] add Go support for IPC integration tests
+* [ARROW-5507](https://issues.apache.org/jira/browse/ARROW-5507) - [Plasma] [CUDA] Compile error
+* [ARROW-5514](https://issues.apache.org/jira/browse/ARROW-5514) - [C++] Printer for uint64 shows wrong values
+* [ARROW-5517](https://issues.apache.org/jira/browse/ARROW-5517) - [C++] Header collection CMake logic should only consider filename without directory included
+* [ARROW-5520](https://issues.apache.org/jira/browse/ARROW-5520) - [C++][Packaging] No NVidia CUDA toolkit on AArch64C
+* [ARROW-5521](https://issues.apache.org/jira/browse/ARROW-5521) - [Packaging] License check fails with Apache RAT 0.13
+* [ARROW-5528](https://issues.apache.org/jira/browse/ARROW-5528) - Concatenate() crashes when concatenating empty binary arrays.
+* [ARROW-5532](https://issues.apache.org/jira/browse/ARROW-5532) - [JS] Field Metadata Not Read
+* [ARROW-5551](https://issues.apache.org/jira/browse/ARROW-5551) - [Go] invalid FixedSizeArray representation
+* [ARROW-5553](https://issues.apache.org/jira/browse/ARROW-5553) - [Ruby] red-arrow gem does not compile on ruby:2.5 docker image
+* [ARROW-5576](https://issues.apache.org/jira/browse/ARROW-5576) - [C++] Flaky thrift\_ep tarball downloads
+* [ARROW-5577](https://issues.apache.org/jira/browse/ARROW-5577) - [C++] Link failure due to googletest shared library on Alpine Linux
+* [ARROW-5583](https://issues.apache.org/jira/browse/ARROW-5583) - [Java] When the isSet of a NullableValueHolder is 0, the buffer field should not be used
+* [ARROW-5584](https://issues.apache.org/jira/browse/ARROW-5584) - [Java] Add import for link reference in FieldReader javadoc
+* [ARROW-5589](https://issues.apache.org/jira/browse/ARROW-5589) - [C++][Fuzzing] arrow-ipc-fuzzing-test crash 2354085db0125113f04f7bd23f54b85cca104713
+* [ARROW-5592](https://issues.apache.org/jira/browse/ARROW-5592) - [Go] implement Duration array
+* [ARROW-5596](https://issues.apache.org/jira/browse/ARROW-5596) - [Python] Flight tests failing on Python 2.7
+* [ARROW-5601](https://issues.apache.org/jira/browse/ARROW-5601) - [gandiva] Error when projector with a string field
+* [ARROW-5603](https://issues.apache.org/jira/browse/ARROW-5603) - [Python] register pytest markers to avoid warnings
+* [ARROW-5605](https://issues.apache.org/jira/browse/ARROW-5605) - [C++][Fuzzing] arrow-ipc-fuzzing-test crash 74aec871d14bb6b07c72ea8f0e8c9f72cbe6b73c
+* [ARROW-5606](https://issues.apache.org/jira/browse/ARROW-5606) - [Python] pandas.RangeIndex.\_start/\_stop/\_step are deprecated
+* [ARROW-5608](https://issues.apache.org/jira/browse/ARROW-5608) - [C++][parquet] Invalid memory access when using parquet::arrow::ColumnReader
+* [ARROW-5615](https://issues.apache.org/jira/browse/ARROW-5615) - [C++] Compilation error due to C++11 string literals on gcc 5.4.0 Ubuntu 16.04
+* [ARROW-5616](https://issues.apache.org/jira/browse/ARROW-5616) - [Python] C++ build failure against Python 2.7 headers
+* [ARROW-5617](https://issues.apache.org/jira/browse/ARROW-5617) - [C++] thrift\_ep 0.12.0 fails to build when using ARROW\_BOOST\_VENDORED=ON
+* [ARROW-5619](https://issues.apache.org/jira/browse/ARROW-5619) - [C++] get\_apache\_mirror.py doesn't work with Python 3.5
+* [ARROW-5623](https://issues.apache.org/jira/browse/ARROW-5623) - [CI][GLib] Failed on macOS
+* [ARROW-5624](https://issues.apache.org/jira/browse/ARROW-5624) - [C++] -Duriparser\_SOURCE=BUNDLED is broken
+* [ARROW-5626](https://issues.apache.org/jira/browse/ARROW-5626) - [C++][Gandiva] Expression cache should consider precision and scale too
+* [ARROW-5629](https://issues.apache.org/jira/browse/ARROW-5629) - [C++] Fix Coverity issues
+* [ARROW-5631](https://issues.apache.org/jira/browse/ARROW-5631) - [C++] CMake 3.2 build is broken
+* [ARROW-5648](https://issues.apache.org/jira/browse/ARROW-5648) - [C++] Build fails on mingw without codecvt
+* [ARROW-5654](https://issues.apache.org/jira/browse/ARROW-5654) - [C++] ChunkedArray should validate the types of the arrays
+* [ARROW-5674](https://issues.apache.org/jira/browse/ARROW-5674) - [Python] Missing pandas pytest markers from test\_parquet.py
+* [ARROW-5675](https://issues.apache.org/jira/browse/ARROW-5675) - [Doc] Fix typo in documentation describing compile/debug workflow on macOS with Xcode IDE
+* [ARROW-5678](https://issues.apache.org/jira/browse/ARROW-5678) - [R][Lint] Fix hadolint docker linting error
+* [ARROW-5693](https://issues.apache.org/jira/browse/ARROW-5693) - [Go] skip IPC integration test for Decimal128
+* [ARROW-5697](https://issues.apache.org/jira/browse/ARROW-5697) - [GLib] c\_glib/Dockerfile is broken
+* [ARROW-5698](https://issues.apache.org/jira/browse/ARROW-5698) - [R] r/Dockerfile docker-compose build is broken
+* [ARROW-5709](https://issues.apache.org/jira/browse/ARROW-5709) - [C++] gandiva-date\_time\_test failure on Windows
+* [ARROW-5714](https://issues.apache.org/jira/browse/ARROW-5714) - [JS] Inconsistent behavior in Int64Builder with/without BigNum
+* [ARROW-5723](https://issues.apache.org/jira/browse/ARROW-5723) - [Gandiva][Crossbow] Builds failing
+* [ARROW-5728](https://issues.apache.org/jira/browse/ARROW-5728) - [Python] [CI] Travis-CI failures in test\_jvm.py
+* [ARROW-5730](https://issues.apache.org/jira/browse/ARROW-5730) - [Python][CI] Selectively skip test cases in the dask integration test
+* [ARROW-5732](https://issues.apache.org/jira/browse/ARROW-5732) - [C++] macOS builds failing idiosyncratically on master with warnings from pmmintrin.h
+* [ARROW-5735](https://issues.apache.org/jira/browse/ARROW-5735) - [C++] Appveyor builds failing persistently in thrift\_ep build
+* [ARROW-5737](https://issues.apache.org/jira/browse/ARROW-5737) - [C++][Gandiva] Gandiva not building in manylinux
+* [ARROW-5738](https://issues.apache.org/jira/browse/ARROW-5738) - [Crossbow][Conda] OSX package builds are failing with missing intrinsics
+* [ARROW-5739](https://issues.apache.org/jira/browse/ARROW-5739) - [CI] Fix docker python build
+* [ARROW-5750](https://issues.apache.org/jira/browse/ARROW-5750) - [Java] Java compilation failures on master
+* [ARROW-5754](https://issues.apache.org/jira/browse/ARROW-5754) - [C++]Missing override for ~GrpcStreamWriter?
+* [ARROW-5765](https://issues.apache.org/jira/browse/ARROW-5765) - [C++] TestDictionary.Validate test is crashed with release build
+* [ARROW-5769](https://issues.apache.org/jira/browse/ARROW-5769) - [Java] org.apache.arrow.flight.TestTls is failed via dev/release/00-prepare.sh
+* [ARROW-5770](https://issues.apache.org/jira/browse/ARROW-5770) - [C++] Fix -Wpessimizing-move in result.h
+* [ARROW-5771](https://issues.apache.org/jira/browse/ARROW-5771) - [Python] Docker python-nopandas job fails
+* [ARROW-5774](https://issues.apache.org/jira/browse/ARROW-5774) - [Java][Documentation] Document the need to checkout git submodules for flight
+* [ARROW-5781](https://issues.apache.org/jira/browse/ARROW-5781) - [Archery] Ensure benchmark clone accepts remotes in revision
+* [ARROW-5791](https://issues.apache.org/jira/browse/ARROW-5791) - [Python] pyarrow.csv.read\_csv hangs + eats all RAM
+* [ARROW-5816](https://issues.apache.org/jira/browse/ARROW-5816) - [Release] Parallel curl does not work reliably in verify-release-candidate-sh
+* [ARROW-5824](https://issues.apache.org/jira/browse/ARROW-5824) - [Gandiva] [C++] Fix decimal null
+* [ARROW-61](https://issues.apache.org/jira/browse/ARROW-61) - [Java] Method can return the value bigger than long MAX\_VALUE
+
+
+[1]: https://www.apache.org/dyn/closer.cgi/arrow/arrow-0.14.0/
+[2]: https://bintray.com/apache/arrow/centos/0.14.0/
+[3]: https://bintray.com/apache/arrow/debian/0.14.0/
+[4]: https://bintray.com/apache/arrow/python/0.14.0/
+[5]: https://bintray.com/apache/arrow/ubuntu/0.14.0/
+[6]: https://github.com/apache/arrow/releases/tag/apache-arrow-0.14.0
diff --git a/site/_release/index.md b/site/_release/index.md
index 61181ef0da5..c2f60c9737d 100644
--- a/site/_release/index.md
+++ b/site/_release/index.md
@@ -26,36 +26,38 @@ limitations under the License.
 
 Navigate to the release page for downloads and the changelog.
 
-* [0.13.0 (1 April 2019)][1]
-* [0.12.0 (20 January 2019)][2]
-* [0.11.1 (19 October 2018)][3]
-* [0.11.0 (8 October 2018)][4]
-* [0.10.0 (6 August 2018)][5]
-* [0.9.0 (21 March 2018)][6]
-* [0.8.0 (18 December 2017)][7]
-* [0.7.1 (1 October 2017)][8]
-* [0.7.0 (17 September 2017)][9]
-* [0.6.0 (14 August 2017)][10]
-* [0.5.0 (23 July 2017)][11]
-* [0.4.1 (9 June 2017)][12]
-* [0.4.0 (22 May 2017)][13]
-* [0.3.0 (5 May 2017)][14]
-* [0.2.0 (18 February 2017)][15]
-* [0.1.0 (10 October 2016)][16]
+* [0.14.0 (4 July 2019)][1]
+* [0.13.0 (1 April 2019)][2]
+* [0.12.0 (20 January 2019)][3]
+* [0.11.1 (19 October 2018)][4]
+* [0.11.0 (8 October 2018)][5]
+* [0.10.0 (6 August 2018)][6]
+* [0.9.0 (21 March 2018)][7]
+* [0.8.0 (18 December 2017)][8]
+* [0.7.1 (1 October 2017)][9]
+* [0.7.0 (17 September 2017)][10]
+* [0.6.0 (14 August 2017)][11]
+* [0.5.0 (23 July 2017)][12]
+* [0.4.1 (9 June 2017)][13]
+* [0.4.0 (22 May 2017)][14]
+* [0.3.0 (5 May 2017)][15]
+* [0.2.0 (18 February 2017)][16]
+* [0.1.0 (10 October 2016)][17]
 
-[1]: {{ site.baseurl }}/release/0.13.0.html
-[2]: {{ site.baseurl }}/release/0.12.0.html
-[3]: {{ site.baseurl }}/release/0.11.1.html
-[4]: {{ site.baseurl }}/release/0.11.0.html
-[5]: {{ site.baseurl }}/release/0.10.0.html
-[6]: {{ site.baseurl }}/release/0.9.0.html
-[7]: {{ site.baseurl }}/release/0.8.0.html
-[8]: {{ site.baseurl }}/release/0.7.1.html
-[9]: {{ site.baseurl }}/release/0.7.0.html
-[10]: {{ site.baseurl }}/release/0.6.0.html
-[11]: {{ site.baseurl }}/release/0.5.0.html
-[12]: {{ site.baseurl }}/release/0.4.1.html
-[13]: {{ site.baseurl }}/release/0.4.0.html
-[14]: {{ site.baseurl }}/release/0.3.0.html
-[15]: {{ site.baseurl }}/release/0.2.0.html
-[16]: {{ site.baseurl }}/release/0.1.0.html
+[1]: {{ site.baseurl }}/release/0.14.0.html
+[2]: {{ site.baseurl }}/release/0.13.0.html
+[3]: {{ site.baseurl }}/release/0.12.0.html
+[4]: {{ site.baseurl }}/release/0.11.1.html
+[5]: {{ site.baseurl }}/release/0.11.0.html
+[6]: {{ site.baseurl }}/release/0.10.0.html
+[7]: {{ site.baseurl }}/release/0.9.0.html
+[8]: {{ site.baseurl }}/release/0.8.0.html
+[9]: {{ site.baseurl }}/release/0.7.1.html
+[10]: {{ site.baseurl }}/release/0.7.0.html
+[11]: {{ site.baseurl }}/release/0.6.0.html
+[12]: {{ site.baseurl }}/release/0.5.0.html
+[13]: {{ site.baseurl }}/release/0.4.1.html
+[14]: {{ site.baseurl }}/release/0.4.0.html
+[15]: {{ site.baseurl }}/release/0.3.0.html
+[16]: {{ site.baseurl }}/release/0.2.0.html
+[17]: {{ site.baseurl }}/release/0.1.0.html
diff --git a/site/index.html b/site/index.html
index 4d5995ac54a..4aab88e5409 100644
--- a/site/index.html
+++ b/site/index.html
@@ -23,7 +23,7 @@ <h5>
 <div class="row">
   <div class="col-lg-4">
       <h2 class="mt-3">Fast</h2>
-      <p>Apache Arrow&#8482; enables execution engines to take advantage of the latest SIMD (Single input multiple data) operations included in modern processors, for native vectorized optimization of analytical data processing. Columnar layout is optimized for data locality for better performance on modern hardware like CPUs and GPUs.</p>
+      <p>Apache Arrow&#8482; enables execution engines to take advantage of the latest SIMD (Single instruction, multiple data) operations included in modern processors, for native vectorized optimization of analytical data processing. Columnar layout is optimized for data locality for better performance on modern hardware like CPUs and GPUs.</p>
       <p>The Arrow memory format supports <strong>zero-copy reads</strong> for lightning-fast data access without serialization overhead.</p>
   </div>
   <div class="col-lg-4">
diff --git a/site/powered_by.md b/site/powered_by.md
index cdbede25ac3..e1a474c68eb 100644
--- a/site/powered_by.md
+++ b/site/powered_by.md
@@ -70,11 +70,11 @@ short description of your use case.
   Dremio reads data from any source (RDBMS, HDFS, S3, NoSQL) into Arrow
   buffers, and provides fast SQL access via ODBC, JDBC, and REST for BI,
   Python, R, and more (all backed by Apache Arrow).
-* **[Fletcher][20]:** Fletcher is an FPGA acceleration framework that can
-  convert an Arrow schema into an easy-to-use hardware interface. The
-  accelerator can request data from Arrow tables by supplying row indices.
-  In turn, the interface provides streams of data of the types defined
-  through the schema. Furthermore, Arrow alleviates serialization bottlenecks.
+* **[Fletcher][20]:** Fletcher is a framework that can integrate FPGA 
+  accelerators with tools and frameworks that use the Apache Arrow in-memory
+  format. From a set of Arrow Schemas, Fletcher generates highly optimized 
+  hardware structures that allow accelerator kernels to read and write 
+  RecordBatches at system bandwidth through easy-to-use interfaces.
 * **[GeoMesa][8]:** A suite of tools that enables large-scale geospatial query
   and analytics on distributed computing systems. GeoMesa supports query
   results in the Arrow IPC format, which can then be used for in-browser
@@ -163,7 +163,7 @@ short description of your use case.
 [17]: https://github.com/red-data-tools/red-arrow/
 [18]: https://www.graphistry.com
 [19]: http://gpuopenanalytics.com
-[20]: https://github.com/johanpel/fletcher
+[20]: https://github.com/abs-tudelft/fletcher
 [21]: https://www.paradigm4.com
 [22]: https://github.com/Paradigm4/stream
 [23]: https://github.com/jpmorganchase/perspective