diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index 0781dd4a2df..900e8d2b38f 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -161,7 +161,13 @@ struct ScalarFromArraySlotImpl {
     }
 
     if (array_.IsNull(index_)) {
-      return MakeNullScalar(array_.type());
+      auto null = MakeNullScalar(array_.type());
+      if (is_dictionary(array_.type()->id())) {
+        auto& dict_null = checked_cast<DictionaryScalar&>(*null);
+        const auto& dict_array = checked_cast<const DictionaryArray&>(array_);
+        dict_null.value.dictionary = dict_array.dictionary();
+      }
+      return null;
     }
 
     RETURN_NOT_OK(VisitArrayInline(array_, this));
diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc
index 9c2cd888692..af732eab068 100644
--- a/cpp/src/arrow/array/array_binary_test.cc
+++ b/cpp/src/arrow/array/array_binary_test.cc
@@ -570,6 +570,26 @@ class TestStringBuilder : public TestBuilder {
     ASSERT_EQ(reps * 40, result_->value_data()->size());
   }
 
+  void TestOverflowCheck() {
+    auto max_size = builder_->memory_limit();
+
+    ASSERT_OK(builder_->ValidateOverflow(1));
+    ASSERT_OK(builder_->ValidateOverflow(max_size));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_size + 1));
+
+    ASSERT_OK(builder_->Append("bb"));
+    ASSERT_OK(builder_->ValidateOverflow(max_size - 2));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_size - 1));
+
+    ASSERT_OK(builder_->AppendNull());
+    ASSERT_OK(builder_->ValidateOverflow(max_size - 2));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_size - 1));
+
+    ASSERT_OK(builder_->Append("ccc"));
+    ASSERT_OK(builder_->ValidateOverflow(max_size - 5));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_size - 4));
+  }
+
   void TestZeroLength() {
     // All buffers are null
     Done();
@@ -602,6 +622,8 @@ TYPED_TEST(TestStringBuilder, TestCapacityReserve) { this->TestCapacityReserve()
 
 TYPED_TEST(TestStringBuilder, TestZeroLength) { this->TestZeroLength(); }
 
+TYPED_TEST(TestStringBuilder, TestOverflowCheck) { this->TestOverflowCheck(); }
+
 // ----------------------------------------------------------------------
 // ChunkedBinaryBuilder tests
 
diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc
index df0eb522cf4..0dff0f48b00 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -467,6 +467,32 @@ class TestListArray : public TestBuilder {
     AssertArraysEqual(*result_, *expected);
   }
 
+  void TestOverflowCheck() {
+    Int16Builder* vb = checked_cast<Int16Builder*>(builder_->value_builder());
+    auto max_elements = builder_->maximum_elements();
+
+    ASSERT_OK(builder_->ValidateOverflow(1));
+    ASSERT_OK(builder_->ValidateOverflow(max_elements));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements + 1));
+
+    ASSERT_OK(builder_->Append());
+    ASSERT_OK(vb->Append(1));
+    ASSERT_OK(vb->Append(2));
+    ASSERT_OK(builder_->ValidateOverflow(max_elements - 2));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 1));
+
+    ASSERT_OK(builder_->AppendNull());
+    ASSERT_OK(builder_->ValidateOverflow(max_elements - 2));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 1));
+
+    ASSERT_OK(builder_->Append());
+    ASSERT_OK(vb->Append(1));
+    ASSERT_OK(vb->Append(2));
+    ASSERT_OK(vb->Append(3));
+    ASSERT_OK(builder_->ValidateOverflow(max_elements - 5));
+    ASSERT_RAISES(CapacityError, builder_->ValidateOverflow(max_elements - 4));
+  }
+
  protected:
   std::shared_ptr<DataType> value_type_;
 
@@ -508,6 +534,12 @@ TYPED_TEST(TestListArray, ValidateOffsets) { this->TestValidateOffsets(); }
 
 TYPED_TEST(TestListArray, CornerCases) { this->TestCornerCases(); }
 
+#ifndef ARROW_LARGE_MEMORY_TESTS
+TYPED_TEST(TestListArray, DISABLED_TestOverflowCheck) { this->TestOverflowCheck(); }
+#else
+TYPED_TEST(TestListArray, TestOverflowCheck) { this->TestOverflowCheck(); }
+#endif
+
 // ----------------------------------------------------------------------
 // Map tests
 
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 6f015dda3e1..b92cc285894 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -99,6 +99,12 @@ Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
   return Status::OK();
 }
 
+Result<std::shared_ptr<Array>> ArrayBuilder::Finish() {
+  std::shared_ptr<Array> out;
+  RETURN_NOT_OK(Finish(&out));
+  return out;
+}
+
 void ArrayBuilder::Reset() {
   capacity_ = length_ = null_count_ = 0;
   null_bitmap_builder_.Reset();
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 33b1f9b3e66..d73681756ba 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -56,6 +56,8 @@ class ARROW_EXPORT ArrayBuilder {
   /// skip shared pointers and just return a raw pointer
   ArrayBuilder* child(int i) { return children_[i].get(); }
 
+  const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
+
   int num_children() const { return static_cast<int>(children_.size()); }
 
   virtual int64_t length() const { return length_; }
@@ -118,6 +120,13 @@ class ARROW_EXPORT ArrayBuilder {
   /// \return Status
   Status Finish(std::shared_ptr<Array>* out);
 
+  /// \brief Return result of builder as an Array object.
+  ///
+  /// The builder is reset except for DictionaryBuilder.
+  ///
+  /// \return The finalized Array object
+  Result<std::shared_ptr<Array>> Finish();
+
   /// \brief Return the type of the built Array
   virtual std::shared_ptr<DataType> type() const = 0;
 
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 593b533a19c..21993ce5493 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -78,9 +78,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
 
   Status AppendNulls(int64_t length) final {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
-      return AppendOverflow(num_bytes);
-    }
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
     ARROW_RETURN_NOT_OK(Reserve(length));
     for (int64_t i = 0; i < length; ++i) {
       offsets_builder_.UnsafeAppend(static_cast<offset_type>(num_bytes));
@@ -232,6 +230,16 @@ class BaseBinaryBuilder : public ArrayBuilder {
     value_data_builder_.Reset();
   }
 
+  Status ValidateOverflow(int64_t new_bytes) {
+    auto new_size = value_data_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
   Status Resize(int64_t capacity) override {
     // XXX Why is this check necessary?  There is no reason to disallow, say,
     // binary arrays with more than 2**31 empty or null values.
@@ -249,12 +257,8 @@ class BaseBinaryBuilder : public ArrayBuilder {
   /// \brief Ensures there is enough allocated capacity to append the indicated
   /// number of bytes to the value data buffer without additional allocations
   Status ReserveData(int64_t elements) {
-    const int64_t size = value_data_length() + elements;
-    ARROW_RETURN_IF(size > memory_limit(),
-                    Status::CapacityError("Cannot reserve capacity larger than ",
-                                          memory_limit(), " bytes"));
-    return (size > value_data_capacity()) ? value_data_builder_.Reserve(elements)
-                                          : Status::OK();
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return value_data_builder_.Reserve(elements);
   }
 
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
@@ -317,16 +321,9 @@ class BaseBinaryBuilder : public ArrayBuilder {
   TypedBufferBuilder<offset_type> offsets_builder_;
   TypedBufferBuilder<uint8_t> value_data_builder_;
 
-  Status AppendOverflow(int64_t num_bytes) {
-    return Status::CapacityError("array cannot contain more than ", memory_limit(),
-                                 " bytes, have ", num_bytes);
-  }
-
   Status AppendNextOffset() {
     const int64_t num_bytes = value_data_builder_.length();
-    if (ARROW_PREDICT_FALSE(num_bytes > memory_limit())) {
-      return AppendOverflow(num_bytes);
-    }
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
     return offsets_builder_.Append(static_cast<offset_type>(num_bytes));
   }
 
@@ -462,6 +459,23 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
     byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
   }
 
+  Status ValidateOverflow(int64_t new_bytes) const {
+    auto new_size = byte_builder_.length() + new_bytes;
+    if (ARROW_PREDICT_FALSE(new_size > memory_limit())) {
+      return Status::CapacityError("array cannot contain more than ", memory_limit(),
+                                   " bytes, have ", new_size);
+    } else {
+      return Status::OK();
+    }
+  }
+
+  /// \brief Ensures there is enough allocated capacity to append the indicated
+  /// number of bytes to the value data buffer without additional allocations
+  Status ReserveData(int64_t elements) {
+    ARROW_RETURN_NOT_OK(ValidateOverflow(elements));
+    return byte_builder_.Reserve(elements);
+  }
+
   void Reset() override;
   Status Resize(int64_t capacity) override;
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
diff --git a/cpp/src/arrow/array/builder_nested.cc b/cpp/src/arrow/array/builder_nested.cc
index b8af62fab14..c3786a8fab4 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -54,6 +54,18 @@ MapBuilder::MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& ke
     : MapBuilder(pool, key_builder, item_builder,
                  map(key_builder->type(), item_builder->type(), keys_sorted)) {}
 
+MapBuilder::MapBuilder(MemoryPool* pool,
+                       const std::shared_ptr<ArrayBuilder>& struct_builder,
+                       const std::shared_ptr<DataType>& type)
+    : ArrayBuilder(pool) {
+  auto map_type = internal::checked_cast<const MapType*>(type.get());
+  keys_sorted_ = map_type->keys_sorted();
+  key_builder_ = struct_builder->child_builder(0);
+  item_builder_ = struct_builder->child_builder(1);
+  list_builder_ =
+      std::make_shared<ListBuilder>(pool, struct_builder, struct_builder->type());
+}
+
 Status MapBuilder::Resize(int64_t capacity) {
   RETURN_NOT_OK(list_builder_->Resize(capacity));
   capacity_ = list_builder_->capacity();
@@ -170,6 +182,19 @@ Status FixedSizeListBuilder::AppendNulls(int64_t length) {
   return value_builder_->AppendNulls(list_size_ * length);
 }
 
+Status FixedSizeListBuilder::ValidateOverflow(int64_t new_elements) {
+  auto new_length = value_builder_->length() + new_elements;
+  if (new_elements != list_size_) {
+    return Status::Invalid("Length of item not correct: expected ", list_size_,
+                           " but got array of size ", new_elements);
+  }
+  if (new_length > maximum_elements()) {
+    return Status::CapacityError("array cannot contain more than ", maximum_elements(),
+                                 " elements, have ", new_elements);
+  }
+  return Status::OK();
+}
+
 Status FixedSizeListBuilder::Resize(int64_t capacity) {
   RETURN_NOT_OK(CheckCapacity(capacity));
   return ArrayBuilder::Resize(capacity);
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index cd6fadfcc2f..b8948403acc 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -100,7 +100,7 @@ class BaseListBuilder : public ArrayBuilder {
 
   Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(Reserve(length));
-    ARROW_RETURN_NOT_OK(CheckNextOffset());
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
     UnsafeAppendToBitmap(length, false);
     const int64_t num_values = value_builder_->length();
     for (int64_t i = 0; i < length; ++i) {
@@ -131,6 +131,16 @@ class BaseListBuilder : public ArrayBuilder {
     return Status::OK();
   }
 
+  Status ValidateOverflow(int64_t new_elements) const {
+    auto new_length = value_builder_->length() + new_elements;
+    if (ARROW_PREDICT_FALSE(new_length > maximum_elements())) {
+      return Status::CapacityError("List array cannot contain more than ",
+                                   maximum_elements(), " elements, have ", new_elements);
+    } else {
+      return Status::OK();
+    }
+  }
+
   ArrayBuilder* value_builder() const { return value_builder_.get(); }
 
   // Cannot make this a static attribute because of linking issues
@@ -147,17 +157,8 @@ class BaseListBuilder : public ArrayBuilder {
   std::shared_ptr<ArrayBuilder> value_builder_;
   std::shared_ptr<Field> value_field_;
 
-  Status CheckNextOffset() const {
-    const int64_t num_values = value_builder_->length();
-    ARROW_RETURN_IF(
-        num_values > maximum_elements(),
-        Status::CapacityError("List array cannot contain more than ", maximum_elements(),
-                              " child elements,", " have ", num_values));
-    return Status::OK();
-  }
-
   Status AppendNextOffset() {
-    ARROW_RETURN_NOT_OK(CheckNextOffset());
+    ARROW_RETURN_NOT_OK(ValidateOverflow(0));
     const int64_t num_values = value_builder_->length();
     return offsets_builder_.Append(static_cast<offset_type>(num_values));
   }
@@ -227,6 +228,9 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
   MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& key_builder,
              const std::shared_ptr<ArrayBuilder>& item_builder, bool keys_sorted = false);
 
+  MapBuilder(MemoryPool* pool, const std::shared_ptr<ArrayBuilder>& item_builder,
+             const std::shared_ptr<DataType>& type);
+
   Status Resize(int64_t capacity) override;
   void Reset() override;
   Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
@@ -276,6 +280,10 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
     return map(key_builder_->type(), item_builder_->type(), keys_sorted_);
   }
 
+  Status ValidateOverflow(int64_t new_elements) {
+    return list_builder_->ValidateOverflow(new_elements);
+  }
+
  protected:
   inline Status AdjustStructBuilderLength();
 
@@ -343,12 +351,19 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
   /// automatically.
   Status AppendNulls(int64_t length) final;
 
+  Status ValidateOverflow(int64_t new_elements);
+
   ArrayBuilder* value_builder() const { return value_builder_.get(); }
 
   std::shared_ptr<DataType> type() const override {
     return fixed_size_list(value_field_->WithType(value_builder_->type()), list_size_);
   }
 
+  // Cannot make this a static attribute because of linking issues
+  static constexpr int64_t maximum_elements() {
+    return std::numeric_limits<FixedSizeListType::offset_type>::max() - 1;
+  }
+
  protected:
   std::shared_ptr<Field> value_field_;
   const int32_t list_size_;
diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h
index e6b1baa5879..cc907fb6b8a 100644
--- a/cpp/src/arrow/array/builder_primitive.h
+++ b/cpp/src/arrow/array/builder_primitive.h
@@ -31,6 +31,9 @@ namespace arrow {
 class ARROW_EXPORT NullBuilder : public ArrayBuilder {
  public:
   explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
+  explicit NullBuilder(const std::shared_ptr<DataType>& type,
+                       MemoryPool* pool = default_memory_pool())
+      : NullBuilder(pool) {}
 
   /// \brief Append the specified number of null elements
   Status AppendNulls(int64_t length) final {
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 52a3f334d4e..8560fa2d6f4 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -23,6 +23,7 @@
 #include "arrow/buffer.h"
 #include "arrow/python/pyarrow.h"
 #include "arrow/python/visibility.h"
+#include "arrow/result.h"
 #include "arrow/util/macros.h"
 
 namespace arrow {
@@ -188,84 +189,80 @@ class ARROW_PYTHON_EXPORT OwnedRefNoGIL : public OwnedRef {
 struct PyBytesView {
   const char* bytes;
   Py_ssize_t size;
+  bool is_utf8;
 
-  PyBytesView() : bytes(NULLPTR), size(0), ref(NULLPTR) {}
-
-  // View the given Python object as binary-like, i.e. bytes
-  Status FromBinary(PyObject* obj) { return FromBinary(obj, "a bytes object"); }
-
-  Status FromString(PyObject* obj) {
-    bool ignored = false;
-    return FromString(obj, false, &ignored);
+  static Result<PyBytesView> FromString(PyObject* obj, bool check_utf8 = false) {
+    PyBytesView self;
+    ARROW_RETURN_NOT_OK(self.ParseString(obj, check_utf8));
+    return std::move(self);
   }
 
-  Status FromString(PyObject* obj, bool* is_utf8) {
-    return FromString(obj, true, is_utf8);
+  static Result<PyBytesView> FromUnicode(PyObject* obj) {
+    PyBytesView self;
+    ARROW_RETURN_NOT_OK(self.ParseUnicode(obj));
+    return std::move(self);
   }
 
-  Status FromUnicode(PyObject* obj) {
-    Py_ssize_t size;
-    // The utf-8 representation is cached on the unicode object
-    const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
-    RETURN_IF_PYERROR();
-    this->bytes = data;
-    this->size = size;
-    this->ref.reset();
-    return Status::OK();
+  static Result<PyBytesView> FromBinary(PyObject* obj) {
+    PyBytesView self;
+    ARROW_RETURN_NOT_OK(self.ParseBinary(obj));
+    return std::move(self);
   }
 
- protected:
-  PyBytesView(const char* b, Py_ssize_t s, PyObject* obj = NULLPTR)
-      : bytes(b), size(s), ref(obj) {}
-
   // View the given Python object as string-like, i.e. str or (utf8) bytes
-  Status FromString(PyObject* obj, bool check_utf8, bool* is_utf8) {
+  Status ParseString(PyObject* obj, bool check_utf8 = false) {
     if (PyUnicode_Check(obj)) {
-      *is_utf8 = true;
-      return FromUnicode(obj);
+      return ParseUnicode(obj);
     } else {
-      ARROW_RETURN_NOT_OK(FromBinary(obj, "a string or bytes object"));
+      ARROW_RETURN_NOT_OK(ParseBinary(obj));
       if (check_utf8) {
         // Check the bytes are utf8 utf-8
         OwnedRef decoded(PyUnicode_FromStringAndSize(bytes, size));
         if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
-          *is_utf8 = true;
+          is_utf8 = true;
         } else {
-          *is_utf8 = false;
           PyErr_Clear();
+          is_utf8 = false;
         }
-      } else {
-        *is_utf8 = false;
       }
       return Status::OK();
     }
   }
 
-  Status FromBinary(PyObject* obj, const char* expected_msg) {
+  // View the given Python object as unicode string
+  Status ParseUnicode(PyObject* obj) {
+    // The utf-8 representation is cached on the unicode object
+    bytes = PyUnicode_AsUTF8AndSize(obj, &size);
+    RETURN_IF_PYERROR();
+    is_utf8 = true;
+    return Status::OK();
+  }
+
+  // View the given Python object as binary-like, i.e. bytes
+  Status ParseBinary(PyObject* obj) {
     if (PyBytes_Check(obj)) {
-      this->bytes = PyBytes_AS_STRING(obj);
-      this->size = PyBytes_GET_SIZE(obj);
-      this->ref.reset();
-      return Status::OK();
+      bytes = PyBytes_AS_STRING(obj);
+      size = PyBytes_GET_SIZE(obj);
+      is_utf8 = false;
     } else if (PyByteArray_Check(obj)) {
-      this->bytes = PyByteArray_AS_STRING(obj);
-      this->size = PyByteArray_GET_SIZE(obj);
-      this->ref.reset();
-      return Status::OK();
+      bytes = PyByteArray_AS_STRING(obj);
+      size = PyByteArray_GET_SIZE(obj);
+      is_utf8 = false;
     } else if (PyMemoryView_Check(obj)) {
-      PyObject* contig_view = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
+      PyObject* ref = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C');
       RETURN_IF_PYERROR();
-      this->ref.reset(contig_view);
-      Py_buffer* buf = PyMemoryView_GET_BUFFER(contig_view);
-      this->bytes = reinterpret_cast<const char*>(buf->buf);
-      this->size = buf->len;
-      return Status::OK();
+      Py_buffer* buffer = PyMemoryView_GET_BUFFER(ref);
+      bytes = reinterpret_cast<const char*>(buffer->buf);
+      size = buffer->len;
+      is_utf8 = false;
     } else {
-      return Status::TypeError("Expected ", expected_msg, ", got a '",
-                               Py_TYPE(obj)->tp_name, "' object");
+      return Status::TypeError("Expected bytes, got a '", Py_TYPE(obj)->tp_name,
+                               "' object");
     }
+    return Status::OK();
   }
 
+ protected:
   OwnedRef ref;
 };
 
diff --git a/cpp/src/arrow/python/datetime.cc b/cpp/src/arrow/python/datetime.cc
index 4eeab7f5a69..07df5e76b45 100644
--- a/cpp/src/arrow/python/datetime.cc
+++ b/cpp/src/arrow/python/datetime.cc
@@ -419,6 +419,15 @@ Result<std::string> TzinfoToString(PyObject* tzinfo) {
     return PyTZInfo_utcoffset_hhmm(tzinfo);
   }
 
+  // try to look up zone attribute
+  if (PyObject_HasAttrString(tzinfo, "zone")) {
+    OwnedRef zone(PyObject_GetAttrString(tzinfo, "zone"));
+    RETURN_IF_PYERROR();
+    std::string result;
+    RETURN_NOT_OK(internal::PyUnicode_AsStdString(zone.obj(), &result));
+    return result;
+  }
+
   // attempt to call tzinfo.tzname(None)
   OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None));
   RETURN_IF_PYERROR();
diff --git a/cpp/src/arrow/python/inference.cc b/cpp/src/arrow/python/inference.cc
index d1ce2c2f797..a75a887693c 100644
--- a/cpp/src/arrow/python/inference.cc
+++ b/cpp/src/arrow/python/inference.cc
@@ -620,39 +620,23 @@ class TypeInferrer {
 };
 
 // Non-exhaustive type inference
-Status InferArrowType(PyObject* obj, PyObject* mask, bool pandas_null_sentinels,
-                      std::shared_ptr<DataType>* out_type) {
+Result<std::shared_ptr<DataType>> InferArrowType(PyObject* obj, PyObject* mask,
+                                                 bool pandas_null_sentinels) {
   if (pandas_null_sentinels) {
     // ARROW-842: If pandas is not installed then null checks will be less
     // comprehensive, but that is okay.
     internal::InitPandasStaticData();
   }
 
+  std::shared_ptr<DataType> out_type;
   TypeInferrer inferrer(pandas_null_sentinels);
   RETURN_NOT_OK(inferrer.VisitSequence(obj, mask));
-  RETURN_NOT_OK(inferrer.GetType(out_type));
-  if (*out_type == nullptr) {
+  RETURN_NOT_OK(inferrer.GetType(&out_type));
+  if (out_type == nullptr) {
     return Status::TypeError("Unable to determine data type");
+  } else {
+    return std::move(out_type);
   }
-
-  return Status::OK();
-}
-
-Status InferArrowTypeAndSize(PyObject* obj, PyObject* mask, bool pandas_null_sentinels,
-                             int64_t* size, std::shared_ptr<DataType>* out_type) {
-  if (!PySequence_Check(obj)) {
-    return Status::TypeError("Object is not a sequence");
-  }
-  *size = static_cast<int64_t>(PySequence_Size(obj));
-
-  // For 0-length sequences, refuse to guess
-  if (*size == 0) {
-    *out_type = null();
-    return Status::OK();
-  }
-  RETURN_NOT_OK(InferArrowType(obj, mask, pandas_null_sentinels, out_type));
-
-  return Status::OK();
 }
 
 ARROW_PYTHON_EXPORT
diff --git a/cpp/src/arrow/python/inference.h b/cpp/src/arrow/python/inference.h
index 74d1b78161c..eff18362934 100644
--- a/cpp/src/arrow/python/inference.h
+++ b/cpp/src/arrow/python/inference.h
@@ -44,15 +44,9 @@ namespace py {
 /// \param[in] mask an optional mask where True values are null. May
 /// be nullptr
 /// \param[in] pandas_null_sentinels use pandas's null value markers
-/// \param[out] out_type the inferred type
 ARROW_PYTHON_EXPORT
-arrow::Status InferArrowType(PyObject* obj, PyObject* mask, bool pandas_null_sentinels,
-                             std::shared_ptr<arrow::DataType>* out_type);
-
-ARROW_PYTHON_EXPORT
-arrow::Status InferArrowTypeAndSize(PyObject* obj, PyObject* mask,
-                                    bool pandas_null_sentinels, int64_t* size,
-                                    std::shared_ptr<arrow::DataType>* out_type);
+Result<std::shared_ptr<arrow::DataType>> InferArrowType(PyObject* obj, PyObject* mask,
+                                                        bool pandas_null_sentinels);
 
 /// Checks whether the passed Python object is a boolean scalar
 ARROW_PYTHON_EXPORT
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index af608dfc360..2847c4aa6b5 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -314,11 +314,11 @@ Status NumPyConverter::Convert() {
     PyConversionOptions py_options;
     py_options.type = type_;
     py_options.from_pandas = from_pandas_;
-    std::shared_ptr<ChunkedArray> res;
-    RETURN_NOT_OK(ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
-                                    reinterpret_cast<PyObject*>(mask_), py_options,
-                                    &res));
-    out_arrays_ = res->chunks();
+    ARROW_ASSIGN_OR_RAISE(
+        auto chunked_array,
+        ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
+                          reinterpret_cast<PyObject*>(mask_), py_options, pool_));
+    out_arrays_ = chunked_array->chunks();
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/python/python_test.cc b/cpp/src/arrow/python/python_test.cc
index b21c16af50a..80bda384bde 100644
--- a/cpp/src/arrow/python/python_test.cc
+++ b/cpp/src/arrow/python/python_test.cc
@@ -329,8 +329,7 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
   ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
   ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);
 
-  std::shared_ptr<ChunkedArray> arr;
-  ASSERT_RAISES(TypeError, ConvertPySequence(list, {}, &arr));
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, {}));
 }
 
 TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
@@ -422,17 +421,18 @@ TEST_F(DecimalTest, TestNoneAndNaN) {
   ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2));
   ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3));
 
-  std::shared_ptr<ChunkedArray> arr, arr_from_pandas;
   PyConversionOptions options;
-  ASSERT_RAISES(TypeError, ConvertPySequence(list, options, &arr));
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, options));
 
   options.from_pandas = true;
-  ASSERT_OK(ConvertPySequence(list, options, &arr_from_pandas));
-  auto c0 = arr_from_pandas->chunk(0);
-  ASSERT_TRUE(c0->IsValid(0));
-  ASSERT_TRUE(c0->IsNull(1));
-  ASSERT_TRUE(c0->IsNull(2));
-  ASSERT_TRUE(c0->IsNull(3));
+  ASSERT_OK_AND_ASSIGN(auto chunked, ConvertPySequence(list, nullptr, options));
+  ASSERT_EQ(chunked->num_chunks(), 1);
+
+  auto arr = chunked->chunk(0);
+  ASSERT_TRUE(arr->IsValid(0));
+  ASSERT_TRUE(arr->IsNull(1));
+  ASSERT_TRUE(arr->IsNull(2));
+  ASSERT_TRUE(arr->IsNull(3));
 }
 
 TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
@@ -451,8 +451,7 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
     ASSERT_EQ(0, result);
   }
 
-  std::shared_ptr<ChunkedArray> arr;
-  ASSERT_OK(ConvertPySequence(list, {}, &arr));
+  ASSERT_OK_AND_ASSIGN(auto arr, ConvertPySequence(list, nullptr, {}))
   const auto& type = checked_cast<const DecimalType&>(*arr->type());
 
   int32_t expected_precision = 9;
@@ -476,9 +475,7 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) {
   ASSERT_EQ(PyList_SetItem(list, 0, value1), 0);
   ASSERT_EQ(PyList_SetItem(list, 1, value2), 0);
 
-  std::shared_ptr<ChunkedArray> arr;
-  ASSERT_OK(ConvertPySequence(list, {}, &arr));
-
+  ASSERT_OK_AND_ASSIGN(auto arr, ConvertPySequence(list, nullptr, {}));
   const auto& type = checked_cast<const Decimal128Type&>(*arr->type());
   ASSERT_EQ(3, type.precision());
   ASSERT_EQ(3, type.scale());
diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc
index 849c474ded3..252577eef01 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -35,6 +35,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/converter.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/int_util_internal.h"
 #include "arrow/util/logging.h"
@@ -46,48 +47,60 @@
 #include "arrow/python/iterators.h"
 #include "arrow/python/numpy_convert.h"
 #include "arrow/python/type_traits.h"
+#include "arrow/visitor_inline.h"
 
 namespace arrow {
 
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
-namespace py {
-
-// ----------------------------------------------------------------------
-// NullCoding
+using internal::Converter;
+using internal::DictionaryConverter;
+using internal::ListConverter;
+using internal::PrimitiveConverter;
+using internal::StructConverter;
 
-enum class NullCoding : char { NONE_ONLY, PANDAS_SENTINELS };
+using internal::MakeChunker;
+using internal::MakeConverter;
 
-template <NullCoding kind>
-struct NullChecker {};
+namespace py {
 
-template <>
-struct NullChecker<NullCoding::NONE_ONLY> {
-  static inline bool Check(PyObject* obj) { return obj == Py_None; }
-};
+// Utility for converting single python objects to their intermediate C representations
+// which can be fed to the typed builders
+class PyValue {
+ public:
+  // Type aliases for shorter signature definitions
+  using I = PyObject*;
+  using O = PyConversionOptions;
+
+  // Used for null checking before actually converting the values
+  static bool IsNull(const O& options, I obj) {
+    if (options.from_pandas) {
+      return internal::PandasObjectIsNull(obj);
+    } else {
+      return obj == Py_None;
+    }
+  }
 
-template <>
-struct NullChecker<NullCoding::PANDAS_SENTINELS> {
-  static inline bool Check(PyObject* obj) { return internal::PandasObjectIsNull(obj); }
-};
+  // Used for post-conversion numpy NaT sentinel checking
+  static bool IsNaT(const TimestampType*, int64_t value) {
+    return internal::npy_traits<NPY_DATETIME>::isnull(value);
+  }
 
-// ----------------------------------------------------------------------
-// ValueConverters
-//
-// Typed conversion logic for single python objects are encapsulated in
-// ValueConverter structs using SFINAE for specialization.
-//
-// The FromPython medthod is responsible to convert the python object to the
-// C++ value counterpart which can be directly appended to the ArrayBuilder or
-// Scalar can be constructed from.
+  // Used for post-conversion numpy NaT sentinel checking
+  static bool IsNaT(const DurationType*, int64_t value) {
+    return internal::npy_traits<NPY_TIMEDELTA>::isnull(value);
+  }
 
-template <typename Type, typename Enable = void>
-struct ValueConverter {};
+  static Result<std::nullptr_t> Convert(const NullType*, const O&, I obj) {
+    if (obj == Py_None) {
+      return nullptr;
+    } else {
+      return Status::Invalid("Invalid null value");
+    }
+  }
 
-template <>
-struct ValueConverter<BooleanType> {
-  static inline Result<bool> FromPython(PyObject* obj) {
+  static Result<bool> Convert(const BooleanType*, const O&, I obj) {
     if (obj == Py_True) {
       return true;
     } else if (obj == Py_False) {
@@ -98,38 +111,28 @@ struct ValueConverter<BooleanType> {
       return internal::InvalidValue(obj, "tried to convert to boolean");
     }
   }
-};
 
-template <typename Type>
-struct ValueConverter<Type, enable_if_integer<Type>> {
-  using ValueType = typename Type::c_type;
-
-  static inline Result<ValueType> FromPython(PyObject* obj) {
-    ValueType value;
-    arrow::Status s_ = internal::CIntFromPython(obj, &value);
-    if (!s_.ok() && !internal::PyIntScalar_Check(obj)) {
+  template <typename T>
+  static enable_if_integer<T, Result<typename T::c_type>> Convert(const T*, const O&,
+                                                                  I obj) {
+    typename T::c_type value;
+    auto status = internal::CIntFromPython(obj, &value);
+    if (ARROW_PREDICT_TRUE(status.ok())) {
+      return value;
+    } else if (!internal::PyIntScalar_Check(obj)) {
       return internal::InvalidValue(obj, "tried to convert to int");
     } else {
-      RETURN_NOT_OK(s_);
+      return status;
     }
-    return value;
   }
-};
-
-template <>
-struct ValueConverter<HalfFloatType> {
-  using ValueType = typename HalfFloatType::c_type;
 
-  static inline Result<ValueType> FromPython(PyObject* obj) {
-    ValueType value;
+  static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
+    uint16_t value;
     RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
     return value;
   }
-};
 
-template <>
-struct ValueConverter<FloatType> {
-  static inline Result<float> FromPython(PyObject* obj) {
+  static Result<float> Convert(const FloatType*, const O&, I obj) {
     float value;
     if (internal::PyFloatScalar_Check(obj)) {
       value = static_cast<float>(PyFloat_AsDouble(obj));
@@ -141,11 +144,8 @@ struct ValueConverter<FloatType> {
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<DoubleType> {
-  static inline Result<double> FromPython(PyObject* obj) {
+  static Result<double> Convert(const DoubleType*, const O&, I obj) {
     double value;
     if (PyFloat_Check(obj)) {
       value = PyFloat_AS_DOUBLE(obj);
@@ -160,11 +160,14 @@ struct ValueConverter<DoubleType> {
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<Date32Type> {
-  static inline Result<int32_t> FromPython(PyObject* obj) {
+  static Result<Decimal128> Convert(const Decimal128Type* type, const O&, I obj) {
+    Decimal128 value;
+    RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value));
+    return value;
+  }
+
+  static Result<int32_t> Convert(const Date32Type*, const O&, I obj) {
     int32_t value;
     if (PyDate_Check(obj)) {
       auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
@@ -175,16 +178,14 @@ struct ValueConverter<Date32Type> {
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<Date64Type> {
-  static inline Result<int64_t> FromPython(PyObject* obj) {
+  static Result<int64_t> Convert(const Date64Type*, const O&, I obj) {
     int64_t value;
     if (PyDateTime_Check(obj)) {
       auto pydate = reinterpret_cast<PyDateTime_DateTime*>(obj);
       value = internal::PyDateTime_to_ms(pydate);
       // Truncate any intraday milliseconds
+      // TODO: introduce an option for this
       value -= value % 86400000LL;
     } else if (PyDate_Check(obj)) {
       auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
@@ -195,16 +196,11 @@ struct ValueConverter<Date64Type> {
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<Time32Type> {
-  static inline Result<int32_t> FromPython(PyObject* obj, TimeUnit::type unit,
-                                           bool /*ignore_timezone*/) {
+  static Result<int32_t> Convert(const Time32Type* type, const O&, I obj) {
     int32_t value;
     if (PyTime_Check(obj)) {
-      // TODO(kszucs): consider to raise if a timezone aware time object is encountered
-      switch (unit) {
+      switch (type->unit()) {
         case TimeUnit::SECOND:
           value = static_cast<int32_t>(internal::PyTime_to_s(obj));
           break;
@@ -215,21 +211,15 @@ struct ValueConverter<Time32Type> {
           return Status::UnknownError("Invalid time unit");
       }
     } else {
-      // TODO(kszucs): validate maximum value?
       RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int32"));
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<Time64Type> {
-  static inline Result<int64_t> FromPython(PyObject* obj, TimeUnit::type unit,
-                                           bool /*ignore_timezone=*/) {
+  static Result<int64_t> Convert(const Time64Type* type, const O&, I obj) {
     int64_t value;
     if (PyTime_Check(obj)) {
-      // TODO(kszucs): consider to raise if a timezone aware time object is encountered
-      switch (unit) {
+      switch (type->unit()) {
         case TimeUnit::MICRO:
           value = internal::PyTime_to_us(obj);
           break;
@@ -240,25 +230,21 @@ struct ValueConverter<Time64Type> {
           return Status::UnknownError("Invalid time unit");
       }
     } else {
-      // TODO(kszucs): validate maximum value?
       RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int64"));
     }
     return value;
   }
-};
 
-template <>
-struct ValueConverter<TimestampType> {
-  static inline Result<int64_t> FromPython(PyObject* obj, TimeUnit::type unit,
-                                           bool ignore_timezone) {
-    int64_t value;
+  static Result<int64_t> Convert(const TimestampType* type, const O& options, I obj) {
+    int64_t value, offset;
     if (PyDateTime_Check(obj)) {
-      ARROW_ASSIGN_OR_RAISE(int64_t offset, internal::PyDateTime_utcoffset_s(obj));
-      if (ignore_timezone) {
+      if (ARROW_PREDICT_FALSE(options.ignore_timezone)) {
         offset = 0;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(offset, internal::PyDateTime_utcoffset_s(obj));
       }
       auto dt = reinterpret_cast<PyDateTime_DateTime*>(obj);
-      switch (unit) {
+      switch (type->unit()) {
         case TimeUnit::SECOND:
           value = internal::PyDateTime_to_s(dt) - offset;
           break;
@@ -282,38 +268,26 @@ struct ValueConverter<TimestampType> {
         default:
           return Status::UnknownError("Invalid time unit");
       }
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      // validate that the numpy scalar has np.datetime64 dtype
+      std::shared_ptr<DataType> numpy_type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      if (!numpy_type->Equals(*type)) {
+        return Status::NotImplemented("Expected np.datetime64 but got: ",
+                                      numpy_type->ToString());
+      }
+      return reinterpret_cast<PyDatetimeScalarObject*>(obj)->obval;
     } else {
       RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
     }
     return value;
   }
 
-  static inline Result<int64_t> FromNumpy(PyObject* obj, TimeUnit::type unit) {
-    // validate that the numpy scalar has np.datetime64 dtype
-    std::shared_ptr<DataType> type;
-    RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &type));
-    if (type->id() != TimestampType::type_id) {
-      // TODO(kszucs): the message should highlight the received numpy dtype
-      return Status::Invalid("Expected np.datetime64 but got: ", type->ToString());
-    }
-    // validate that the time units are matching
-    if (unit != checked_cast<const TimestampType&>(*type).unit()) {
-      return Status::NotImplemented(
-          "Cannot convert NumPy np.datetime64 objects with differing unit");
-    }
-    // convert the numpy value
-    return reinterpret_cast<PyDatetimeScalarObject*>(obj)->obval;
-  }
-};
-
-template <>
-struct ValueConverter<DurationType> {
-  static inline Result<int64_t> FromPython(PyObject* obj, TimeUnit::type unit,
-                                           bool /*ignore_timezone*/) {
+  static Result<int64_t> Convert(const DurationType* type, const O&, I obj) {
     int64_t value;
     if (PyDelta_Check(obj)) {
       auto dt = reinterpret_cast<PyDateTime_Delta*>(obj);
-      switch (unit) {
+      switch (type->unit()) {
         case TimeUnit::SECOND:
           value = internal::PyDelta_to_s(dt);
           break;
@@ -329,985 +303,655 @@ struct ValueConverter<DurationType> {
         default:
           return Status::UnknownError("Invalid time unit");
       }
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      // validate that the numpy scalar has np.datetime64 dtype
+      std::shared_ptr<DataType> numpy_type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      if (!numpy_type->Equals(*type)) {
+        return Status::NotImplemented("Expected np.timedelta64 but got: ",
+                                      numpy_type->ToString());
+      }
+      return reinterpret_cast<PyTimedeltaScalarObject*>(obj)->obval;
     } else {
       RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
     }
     return value;
   }
 
-  static inline Result<int64_t> FromNumpy(PyObject* obj, TimeUnit::type unit) {
-    // validate that the numpy scalar has np.timedelta64 dtype
-    std::shared_ptr<DataType> type;
-    RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &type));
-    if (type->id() != DurationType::type_id) {
-      // TODO(kszucs): the message should highlight the received numpy dtype
-      return Status::Invalid("Expected np.timedelta64 but got: ", type->ToString());
-    }
-    // validate that the time units are matching
-    if (unit != checked_cast<const DurationType&>(*type).unit()) {
-      return Status::NotImplemented(
-          "Cannot convert NumPy np.timedelta64 objects with differing unit");
-    }
-    // convert the numpy value
-    return reinterpret_cast<PyTimedeltaScalarObject*>(obj)->obval;
-  }
-};
-
-template <typename Type>
-struct ValueConverter<Type, enable_if_any_binary<Type>> {
-  static inline Result<PyBytesView> FromPython(PyObject* obj) {
-    PyBytesView view;
-    RETURN_NOT_OK(view.FromString(obj));
-    return std::move(view);
-  }
-};
+  // The binary-like intermediate representation is PyBytesView because it keeps temporary
+  // python objects alive (non-contiguous memoryview) and stores whether the original
+  // object was unicode encoded or not, which is used for unicode -> bytes coersion if
+  // there is a non-unicode object observed.
 
-template <typename Type>
-struct ValueConverter<Type, enable_if_string_like<Type>> {
-  static inline Result<PyBytesView> FromPython(PyObject* obj) {
-    // strict conversion, force output to be unicode / utf8 and validate that
-    // any binary values are utf8
-    bool is_utf8 = false;
-    PyBytesView view;
-
-    RETURN_NOT_OK(view.FromString(obj, &is_utf8));
-    if (!is_utf8) {
-      return internal::InvalidValue(obj, "was not a utf8 string");
-    }
-    return std::move(view);
+  static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView& view) {
+    return view.ParseString(obj);
   }
 
-  static inline Result<PyBytesView> FromPython(PyObject* obj, bool* is_utf8) {
-    PyBytesView view;
-
-    // Non-strict conversion; keep track of whether values are unicode or bytes
-    if (PyUnicode_Check(obj)) {
-      *is_utf8 = true;
-      RETURN_NOT_OK(view.FromUnicode(obj));
-    } else {
-      // If not unicode or bytes, FromBinary will error
-      *is_utf8 = false;
-      RETURN_NOT_OK(view.FromBinary(obj));
-    }
-    return std::move(view);
-  }
-};
-
-template <typename Type>
-struct ValueConverter<Type, enable_if_fixed_size_binary<Type>> {
-  static inline Result<PyBytesView> FromPython(PyObject* obj, int32_t byte_width) {
-    PyBytesView view;
-    RETURN_NOT_OK(view.FromString(obj));
-    if (ARROW_PREDICT_FALSE(view.size != byte_width)) {
+  static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
+                        PyBytesView& view) {
+    ARROW_RETURN_NOT_OK(view.ParseString(obj));
+    if (view.size != type->byte_width()) {
       std::stringstream ss;
-      ss << "expected to be length " << byte_width << " was " << view.size;
+      ss << "expected to be length " << type->byte_width() << " was " << view.size;
       return internal::InvalidValue(obj, ss.str());
     } else {
-      return std::move(view);
+      return Status::OK();
     }
   }
-};
-
-// ----------------------------------------------------------------------
-// Sequence converter base and CRTP "middle" subclasses
-
-class SeqConverter;
-
-// Forward-declare converter factory
-Status GetConverter(const std::shared_ptr<DataType>& type, bool from_pandas,
-                    bool strict_conversions, bool ignore_timezone,
-                    std::unique_ptr<SeqConverter>* out);
-
-// Marshal Python sequence (list, tuple, etc.) to Arrow array
-class SeqConverter {
- public:
-  virtual ~SeqConverter() = default;
-
-  // Initialize the sequence converter with an ArrayBuilder created
-  // externally. The reason for this interface is that we have
-  // arrow::MakeBuilder which also creates child builders for nested types, so
-  // we have to pass in the child builders to child SeqConverter in the case of
-  // converting Python objects to Arrow nested types
-  virtual Status Init(ArrayBuilder* builder) = 0;
-
-  // Append a single null value to the builder
-  virtual Status AppendNull() = 0;
 
-  // Append a valid value
-  virtual Status AppendValue(PyObject* seq) = 0;
-
-  // Append a single python object handling Null values
-  virtual Status Append(PyObject* seq) = 0;
-
-  // Append the contents of a Python sequence to the underlying builder,
-  // virtual version
-  virtual Status Extend(PyObject* seq, int64_t size) = 0;
-
-  // Append the contents of a Python sequence to the underlying builder,
-  // virtual version
-  virtual Status ExtendMasked(PyObject* seq, PyObject* mask, int64_t size) = 0;
-
-  virtual Status Close() {
-    if (chunks_.size() == 0 || builder_->length() > 0) {
-      std::shared_ptr<Array> last_chunk;
-      RETURN_NOT_OK(builder_->Finish(&last_chunk));
-      chunks_.emplace_back(std::move(last_chunk));
+  template <typename T>
+  static enable_if_string<T, Status> Convert(const T*, const O& options, I obj,
+                                             PyBytesView& view) {
+    if (options.strict) {
+      // Strict conversion, force output to be unicode / utf8 and validate that
+      // any binary values are utf8
+      ARROW_RETURN_NOT_OK(view.ParseString(obj, true));
+      if (!view.is_utf8) {
+        return internal::InvalidValue(obj, "was not a utf8 string");
+      }
+      return Status::OK();
+    } else {
+      // Non-strict conversion; keep track of whether values are unicode or bytes
+      return view.ParseString(obj);
     }
-    return Status::OK();
   }
 
-  virtual Status GetResult(std::shared_ptr<ChunkedArray>* out) {
-    // Still some accumulated data in the builder. If there are no chunks, we
-    // always call Finish to deal with the edge case where a size-0 sequence
-    // was converted with a specific output type, like array([], type=t)
-    RETURN_NOT_OK(Close());
-    *out = std::make_shared<ChunkedArray>(this->chunks_, builder_->type());
-    return Status::OK();
+  static Result<bool> Convert(const DataType* type, const O&, I obj) {
+    return Status::NotImplemented("PyValue::Convert is not implemented for type ", type);
   }
-
-  ArrayBuilder* builder() const { return builder_; }
-
-  int num_chunks() const { return static_cast<int>(chunks_.size()); }
-
- protected:
-  ArrayBuilder* builder_;
-  bool unfinished_builder_;
-  std::vector<std::shared_ptr<Array>> chunks_;
 };
 
-template <typename Type, NullCoding null_coding = NullCoding::NONE_ONLY>
-class TypedConverter : public SeqConverter {
- public:
-  using BuilderType = typename TypeTraits<Type>::BuilderType;
-
-  Status Init(ArrayBuilder* builder) override {
-    builder_ = builder;
-    DCHECK_NE(builder_, nullptr);
-    typed_builder_ = checked_cast<BuilderType*>(builder);
-    return Status::OK();
-  }
-
-  // Append a missing item (default implementation)
-  Status AppendNull() override { return this->typed_builder_->AppendNull(); }
-
-  // Append null if the obj is None or pandas null otherwise the valid value
-  Status Append(PyObject* obj) override {
-    return NullChecker<null_coding>::Check(obj) ? AppendNull() : AppendValue(obj);
-  }
-
-  Status Extend(PyObject* obj, int64_t size) override {
-    /// Ensure we've allocated enough space
-    RETURN_NOT_OK(typed_builder_->Reserve(size));
-    // Iterate over the items adding each one
-    return internal::VisitSequence(
-        obj, [this](PyObject* item, bool* /* unused */) { return this->Append(item); });
-  }
-
-  Status ExtendMasked(PyObject* obj, PyObject* mask, int64_t size) override {
-    /// Ensure we've allocated enough space
-    RETURN_NOT_OK(typed_builder_->Reserve(size));
-    // Iterate over the items adding each one
-    return internal::VisitSequenceMasked(
-        obj, mask, [this](PyObject* item, bool is_masked, bool* /* unused */) {
-          if (is_masked) {
-            return this->AppendNull();
-          } else {
-            // This will also apply the null-checking convention in the event
-            // that the value is not masked
-            return this->Append(item);  // perhaps use AppendValue instead?
-          }
-        });
-  }
+template <typename T>
+Status Extend(T* converter, PyObject* values, int64_t size) {
+  /// Ensure we've allocated enough space
+  RETURN_NOT_OK(converter->Reserve(size));
+  // Iterate over the items adding each one
+  return internal::VisitSequence(values, [converter](PyObject* item, bool* /* unused */) {
+    return converter->Append(item);
+  });
+}
 
- protected:
-  BuilderType* typed_builder_;
-};
+// Convert and append a sequence of values masked with a numpy array
+template <typename T>
+Status ExtendMasked(T* converter, PyObject* values, PyObject* mask, int64_t size) {
+  /// Ensure we've allocated enough space
+  RETURN_NOT_OK(converter->Reserve(size));
+  // Iterate over the items adding each one
+  return internal::VisitSequenceMasked(
+      values, mask, [converter](PyObject* item, bool is_masked, bool* /* unused */) {
+        if (is_masked) {
+          return converter->AppendNull();
+        } else {
+          // This will also apply the null-checking convention in the event
+          // that the value is not masked
+          return converter->Append(item);  // perhaps use AppendValue instead?
+        }
+      });
+}
 
-// ----------------------------------------------------------------------
-// Sequence converter for null type
+// The base Converter class is a mixin with predefined behavior and constructors.
+using PyConverter = Converter<PyObject*, PyConversionOptions>;
 
-template <NullCoding null_coding>
-class NullConverter : public TypedConverter<NullType, null_coding> {
- public:
-  Status AppendValue(PyObject* obj) override {
-    return internal::InvalidValue(obj, "converting to null type");
-  }
-};
+template <typename T, typename Enable = void>
+class PyPrimitiveConverter;
 
-// ----------------------------------------------------------------------
-// Sequence converter template for primitive (integer and floating point bool) types
+template <typename T>
+class PyListConverter;
 
-template <typename Type, NullCoding null_coding>
-class PrimitiveConverter : public TypedConverter<Type, null_coding> {
-  Status AppendValue(PyObject* obj) override {
-    ARROW_ASSIGN_OR_RAISE(auto value, ValueConverter<Type>::FromPython(obj));
-    return this->typed_builder_->Append(value);
-  }
-};
+template <typename U, typename Enable = void>
+class PyDictionaryConverter;
 
-// ----------------------------------------------------------------------
-// Sequence converters for temporal types
+class PyStructConverter;
 
-template <typename Type, NullCoding null_coding>
-class TimeConverter : public TypedConverter<Type, null_coding> {
- public:
-  explicit TimeConverter(TimeUnit::type unit, bool ignore_timezone)
-      : unit_(unit), ignore_timezone_(ignore_timezone) {}
-
-  // TODO(kszucs): support numpy values for date and time converters
-  Status AppendValue(PyObject* obj) override {
-    ARROW_ASSIGN_OR_RAISE(auto value,
-                          ValueConverter<Type>::FromPython(obj, unit_, ignore_timezone_));
-    return this->typed_builder_->Append(value);
-  }
+template <typename T, typename Enable = void>
+struct PyConverterTrait;
 
- protected:
-  TimeUnit::type unit_;
-  bool ignore_timezone_;
+template <typename T>
+struct PyConverterTrait<
+    T, enable_if_t<!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                   !is_extension_type<T>::value>> {
+  using type = PyPrimitiveConverter<T>;
 };
 
-// TODO(kszucs): move it to the type_traits
 template <typename T>
-struct NumpyType {};
+struct PyConverterTrait<T, enable_if_list_like<T>> {
+  using type = PyListConverter<T>;
+};
 
 template <>
-struct NumpyType<TimestampType> {
-  static inline bool isnull(int64_t v) {
-    return internal::npy_traits<NPY_DATETIME>::isnull(v);
-  }
+struct PyConverterTrait<StructType> {
+  using type = PyStructConverter;
 };
 
 template <>
-struct NumpyType<DurationType> {
-  static inline bool isnull(int64_t v) {
-    return internal::npy_traits<NPY_TIMEDELTA>::isnull(v);
-  }
+struct PyConverterTrait<DictionaryType> {
+  template <typename T>
+  using dictionary_type = PyDictionaryConverter<T>;
 };
 
-template <typename Type, NullCoding null_coding>
-class TemporalConverter : public TimeConverter<Type, null_coding> {
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_null<T>>
+    : public PrimitiveConverter<T, PyConverter> {
  public:
-  using TimeConverter<Type, null_coding>::TimeConverter;
-
-  Status AppendValue(PyObject* obj) override {
-    int64_t value;
-    if (PyArray_CheckAnyScalarExact(obj)) {
-      // convert np.datetime64 / np.timedelta64 depending on Type
-      ARROW_ASSIGN_OR_RAISE(value, ValueConverter<Type>::FromNumpy(obj, this->unit_));
-      if (NumpyType<Type>::isnull(value)) {
-        // checks numpy NaT sentinel after conversion
-        return this->typed_builder_->AppendNull();
-      }
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->primitive_builder_->AppendNull();
     } else {
       ARROW_ASSIGN_OR_RAISE(
-          value,
-          ValueConverter<Type>::FromPython(
-              obj, this->unit_, TimeConverter<Type, null_coding>::ignore_timezone_));
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      return this->primitive_builder_->Append(converted);
     }
-    return this->typed_builder_->Append(value);
   }
 };
 
-// ----------------------------------------------------------------------
-// Sequence converters for Binary, FixedSizeBinary, String
-
-template <typename Type, NullCoding null_coding>
-class BinaryLikeConverter : public TypedConverter<Type, null_coding> {
+template <typename T>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_boolean_type<T>::value || is_number_type<T>::value ||
+                   is_decimal_type<T>::value || is_date_type<T>::value ||
+                   is_time_type<T>::value>> : public PrimitiveConverter<T, PyConverter> {
  public:
-  using BuilderType = typename TypeTraits<Type>::BuilderType;
-
-  inline Status AutoChunk(Py_ssize_t size) {
-    // did we reach the builder size limit?
-    if (ARROW_PREDICT_FALSE(this->typed_builder_->value_data_length() + size >
-                            BuilderType::memory_limit())) {
-      // builder would be full, so need to add a new chunk
-      std::shared_ptr<Array> chunk;
-      RETURN_NOT_OK(this->typed_builder_->Finish(&chunk));
-      this->chunks_.emplace_back(std::move(chunk));
+  Status Append(PyObject* value) override {
+    // Since the required space has been already allocated in the Extend functions we can
+    // rely on the Unsafe builder API which improves the performance.
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      this->primitive_builder_->UnsafeAppend(converted);
     }
     return Status::OK();
   }
+};
 
-  Status AppendString(const PyBytesView& view) {
-    // check that the value fits in the datatype
-    if (view.size > BuilderType::memory_limit()) {
-      return Status::Invalid("string too large for datatype");
+template <typename T>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_timestamp_type<T>::value || is_duration_type<T>::value>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      // Numpy NaT sentinels can be checked after the conversion
+      if (PyArray_CheckAnyScalarExact(value) &&
+          PyValue::IsNaT(this->primitive_type_, converted)) {
+        this->primitive_builder_->UnsafeAppendNull();
+      } else {
+        this->primitive_builder_->UnsafeAppend(converted);
+      }
     }
-    DCHECK_GE(view.size, 0);
-
-    // create a new chunk if the value would overflow the builder
-    RETURN_NOT_OK(AutoChunk(view.size));
+    return Status::OK();
+  }
+};
 
-    // now we can safely append the value to the builder
-    RETURN_NOT_OK(
-        this->typed_builder_->Append(::arrow::util::string_view(view.bytes, view.size)));
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_binary<T>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  using OffsetType = typename T::offset_type;
 
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
+      // Since we don't know the varying length input size in advance, we need to
+      // reserve space in the value builder one by one. ReserveData raises CapacityError
+      // if the value would not fit into the array.
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
+      this->primitive_builder_->UnsafeAppend(view_.bytes,
+                                             static_cast<OffsetType>(view_.size));
+    }
     return Status::OK();
   }
 
  protected:
   // Create a single instance of PyBytesView here to prevent unnecessary object
-  // creation/destruction
-  PyBytesView string_view_;
+  // creation/destruction. This significantly improves the conversion performance.
+  PyBytesView view_;
 };
 
-template <typename Type, NullCoding null_coding>
-class BinaryConverter : public BinaryLikeConverter<Type, null_coding> {
- public:
-  Status AppendValue(PyObject* obj) override {
-    ARROW_ASSIGN_OR_RAISE(auto view, ValueConverter<Type>::FromPython(obj));
-    return this->AppendString(view);
-  }
-};
-
-template <NullCoding null_coding>
-class FixedSizeBinaryConverter
-    : public BinaryLikeConverter<FixedSizeBinaryType, null_coding> {
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
+    : public PrimitiveConverter<T, PyConverter> {
  public:
-  explicit FixedSizeBinaryConverter(int32_t byte_width) : byte_width_(byte_width) {}
-
-  Status AppendValue(PyObject* obj) override {
-    ARROW_ASSIGN_OR_RAISE(
-        this->string_view_,
-        ValueConverter<FixedSizeBinaryType>::FromPython(obj, byte_width_));
-    return this->AppendString(this->string_view_);
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
+      this->primitive_builder_->UnsafeAppend(view_.bytes);
+    }
+    return Status::OK();
   }
 
  protected:
-  int32_t byte_width_;
+  PyBytesView view_;
 };
 
-// For String/UTF8, if strict_conversions enabled, we reject any non-UTF8,
-// otherwise we allow but return results as BinaryArray
-template <typename Type, bool Strict, NullCoding null_coding>
-class StringConverter : public BinaryLikeConverter<Type, null_coding> {
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_string_like<T>>
+    : public PrimitiveConverter<T, PyConverter> {
  public:
-  StringConverter() : binary_count_(0) {}
+  using OffsetType = typename T::offset_type;
 
-  Status AppendValue(PyObject* obj) override {
-    if (Strict) {
-      // raise if the object is not unicode or not an utf-8 encoded bytes
-      ARROW_ASSIGN_OR_RAISE(this->string_view_, ValueConverter<Type>::FromPython(obj));
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
     } else {
-      // keep track of whether values are unicode or bytes; if any bytes are
-      // observe, the result will be bytes
-      bool is_utf8;
-      ARROW_ASSIGN_OR_RAISE(this->string_view_,
-                            ValueConverter<Type>::FromPython(obj, &is_utf8));
-      if (!is_utf8) {
-        ++binary_count_;
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
+      if (!view_.is_utf8) {
+        // observed binary value
+        observed_binary_ = true;
       }
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
+      this->primitive_builder_->UnsafeAppend(view_.bytes,
+                                             static_cast<OffsetType>(view_.size));
     }
-    return this->AppendString(this->string_view_);
+    return Status::OK();
   }
 
-  Status GetResult(std::shared_ptr<ChunkedArray>* out) override {
-    RETURN_NOT_OK(SeqConverter::GetResult(out));
-
-    // If we saw any non-unicode, cast results to BinaryArray
-    if (binary_count_) {
-      // We should have bailed out earlier
-      DCHECK(!Strict);
-      auto binary_type = TypeTraits<typename Type::PhysicalType>::type_singleton();
-      return (*out)->View(binary_type).Value(out);
+  Result<std::shared_ptr<Array>> ToArray() override {
+    ARROW_ASSIGN_OR_RAISE(auto array, (PrimitiveConverter<T, PyConverter>::ToArray()));
+    if (observed_binary_) {
+      // if we saw any non-unicode, cast results to BinaryArray
+      auto binary_type = TypeTraits<typename T::PhysicalType>::type_singleton();
+      return array->View(binary_type);
+    } else {
+      return array;
     }
-    return Status::OK();
   }
 
  protected:
-  int64_t binary_count_;
+  PyBytesView view_;
+  bool observed_binary_ = false;
 };
 
-// ----------------------------------------------------------------------
-// Convert lists (NumPy arrays containing lists or ndarrays as values)
-
-// If the value type does not match the expected NumPy dtype, then fall through
-// to a slower PySequence-based path
-#define LIST_FAST_CASE(TYPE, NUMPY_TYPE, ArrowType)            \
-  case Type::TYPE: {                                           \
-    if (PyArray_DESCR(arr)->type_num != NUMPY_TYPE) {          \
-      return value_converter_->Extend(obj, value_length);      \
-    }                                                          \
-    return AppendNdarrayTypedItem<NUMPY_TYPE, ArrowType>(arr); \
-  }
-
-// Use internal::VisitSequence, fast for NPY_OBJECT but slower otherwise
-#define LIST_SLOW_CASE(TYPE)                            \
-  case Type::TYPE: {                                    \
-    return value_converter_->Extend(obj, value_length); \
+template <typename U>
+class PyDictionaryConverter<U, enable_if_has_c_type<U>>
+    : public DictionaryConverter<U, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->value_builder_->AppendNull();
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto converted,
+                            PyValue::Convert(this->value_type_, this->options_, value));
+      return this->value_builder_->Append(converted);
+    }
   }
+};
 
-// Base class for ListConverter and FixedSizeListConverter (to have both work with CRTP)
-template <typename TypeClass, NullCoding null_coding>
-class BaseListConverter : public TypedConverter<TypeClass, null_coding> {
+template <typename U>
+class PyDictionaryConverter<U, enable_if_has_string_view<U>>
+    : public DictionaryConverter<U, PyConverter> {
  public:
-  using BuilderType = typename TypeTraits<TypeClass>::BuilderType;
-
-  explicit BaseListConverter(bool from_pandas, bool strict_conversions,
-                             bool ignore_timezone)
-      : from_pandas_(from_pandas),
-        strict_conversions_(strict_conversions),
-        ignore_timezone_(ignore_timezone) {}
-
-  Status Init(ArrayBuilder* builder) override {
-    this->builder_ = builder;
-    this->typed_builder_ = checked_cast<BuilderType*>(builder);
-
-    this->value_type_ = checked_cast<const TypeClass&>(*builder->type()).value_type();
-    RETURN_NOT_OK(GetConverter(value_type_, from_pandas_, strict_conversions_,
-                               ignore_timezone_, &value_converter_));
-    return this->value_converter_->Init(this->typed_builder_->value_builder());
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->value_builder_->AppendNull();
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->value_type_, this->options_, value, view_));
+      return this->value_builder_->Append(view_.bytes, static_cast<int32_t>(view_.size));
+    }
   }
 
-  template <int NUMPY_TYPE, typename Type>
-  Status AppendNdarrayTypedItem(PyArrayObject* arr) {
-    using traits = internal::npy_traits<NUMPY_TYPE>;
-    using T = typename traits::value_type;
-    using ValueBuilderType = typename TypeTraits<Type>::BuilderType;
+ protected:
+  PyBytesView view_;
+};
+
+template <typename T>
+class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->list_builder_->AppendNull();
+    }
 
-    const bool null_sentinels_possible =
-        // Always treat Numpy's NaT as null
-        NUMPY_TYPE == NPY_DATETIME || NUMPY_TYPE == NPY_TIMEDELTA ||
-        // Observing pandas's null sentinels
-        (from_pandas_ && traits::supports_nulls);
+    RETURN_NOT_OK(this->list_builder_->Append());
+    if (PyArray_Check(value)) {
+      RETURN_NOT_OK(AppendNdarray(value));
+    } else if (PySequence_Check(value)) {
+      RETURN_NOT_OK(AppendSequence(value));
+    } else {
+      return internal::InvalidType(
+          value, "was not a sequence or recognized null for conversion to list type");
+    }
 
-    auto child_builder = checked_cast<ValueBuilderType*>(value_converter_->builder());
+    return ValidateBuilder(this->list_type_);
+  }
 
-    // TODO(wesm): Vector append when not strided
-    Ndarray1DIndexer<T> values(arr);
-    if (null_sentinels_possible) {
-      for (int64_t i = 0; i < values.size(); ++i) {
-        if (traits::isnull(values[i])) {
-          RETURN_NOT_OK(child_builder->AppendNull());
-        } else {
-          RETURN_NOT_OK(child_builder->Append(values[i]));
-        }
-      }
+ protected:
+  Status ValidateBuilder(const MapType*) {
+    if (this->list_builder_->key_builder()->null_count() > 0) {
+      return Status::Invalid("Invalid Map: key field can not contain null values");
     } else {
-      for (int64_t i = 0; i < values.size(); ++i) {
-        RETURN_NOT_OK(child_builder->Append(values[i]));
-      }
+      return Status::OK();
     }
-    return Status::OK();
   }
 
-  Status AppendNdarrayItem(PyObject* obj) {
-    PyArrayObject* arr = reinterpret_cast<PyArrayObject*>(obj);
+  Status ValidateBuilder(const BaseListType*) { return Status::OK(); }
+
+  Status AppendSequence(PyObject* value) {
+    int64_t size = static_cast<int64_t>(PySequence_Size(value));
+    RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
+    return Extend(this->value_converter_.get(), value, size);
+  }
 
-    if (PyArray_NDIM(arr) != 1) {
+  Status AppendNdarray(PyObject* value) {
+    PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(value);
+    if (PyArray_NDIM(ndarray) != 1) {
       return Status::Invalid("Can only convert 1-dimensional array values");
     }
+    const int64_t size = PyArray_SIZE(ndarray);
+    RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
 
-    const int64_t value_length = PyArray_SIZE(arr);
-
-    switch (value_type_->id()) {
+    const auto value_type = this->value_converter_->builder()->type();
+    switch (value_type->id()) {
+// If the value type does not match the expected NumPy dtype, then fall through
+// to a slower PySequence-based path
+#define LIST_FAST_CASE(TYPE_ID, TYPE, NUMPY_TYPE)               \
+  case Type::TYPE_ID: {                                         \
+    if (PyArray_DESCR(ndarray)->type_num != NUMPY_TYPE) {       \
+      return Extend(this->value_converter_.get(), value, size); \
+    }                                                           \
+    return AppendNdarrayTyped<TYPE, NUMPY_TYPE>(ndarray);       \
+  }
+// Use internal::VisitSequence, fast for NPY_OBJECT but slower otherwise
+#define LIST_SLOW_CASE(TYPE_ID)                               \
+  case Type::TYPE_ID: {                                       \
+    return Extend(this->value_converter_.get(), value, size); \
+  }
       LIST_SLOW_CASE(NA)
-      LIST_FAST_CASE(UINT8, NPY_UINT8, UInt8Type)
-      LIST_FAST_CASE(INT8, NPY_INT8, Int8Type)
-      LIST_FAST_CASE(UINT16, NPY_UINT16, UInt16Type)
-      LIST_FAST_CASE(INT16, NPY_INT16, Int16Type)
-      LIST_FAST_CASE(UINT32, NPY_UINT32, UInt32Type)
-      LIST_FAST_CASE(INT32, NPY_INT32, Int32Type)
-      LIST_FAST_CASE(UINT64, NPY_UINT64, UInt64Type)
-      LIST_FAST_CASE(INT64, NPY_INT64, Int64Type)
+      LIST_FAST_CASE(UINT8, UInt8Type, NPY_UINT8)
+      LIST_FAST_CASE(INT8, Int8Type, NPY_INT8)
+      LIST_FAST_CASE(UINT16, UInt16Type, NPY_UINT16)
+      LIST_FAST_CASE(INT16, Int16Type, NPY_INT16)
+      LIST_FAST_CASE(UINT32, UInt32Type, NPY_UINT32)
+      LIST_FAST_CASE(INT32, Int32Type, NPY_INT32)
+      LIST_FAST_CASE(UINT64, UInt64Type, NPY_UINT64)
+      LIST_FAST_CASE(INT64, Int64Type, NPY_INT64)
+      LIST_FAST_CASE(HALF_FLOAT, HalfFloatType, NPY_FLOAT16)
+      LIST_FAST_CASE(FLOAT, FloatType, NPY_FLOAT)
+      LIST_FAST_CASE(DOUBLE, DoubleType, NPY_DOUBLE)
+      LIST_FAST_CASE(TIMESTAMP, TimestampType, NPY_DATETIME)
+      LIST_FAST_CASE(DURATION, DurationType, NPY_TIMEDELTA)
       LIST_SLOW_CASE(DATE32)
       LIST_SLOW_CASE(DATE64)
       LIST_SLOW_CASE(TIME32)
       LIST_SLOW_CASE(TIME64)
-      LIST_FAST_CASE(TIMESTAMP, NPY_DATETIME, TimestampType)
-      LIST_FAST_CASE(DURATION, NPY_TIMEDELTA, DurationType)
-      LIST_FAST_CASE(HALF_FLOAT, NPY_FLOAT16, HalfFloatType)
-      LIST_FAST_CASE(FLOAT, NPY_FLOAT, FloatType)
-      LIST_FAST_CASE(DOUBLE, NPY_DOUBLE, DoubleType)
       LIST_SLOW_CASE(BINARY)
       LIST_SLOW_CASE(FIXED_SIZE_BINARY)
       LIST_SLOW_CASE(STRING)
+#undef LIST_FAST_CASE
+#undef LIST_SLOW_CASE
       case Type::LIST: {
-        if (PyArray_DESCR(arr)->type_num != NPY_OBJECT) {
+        if (PyArray_DESCR(ndarray)->type_num != NPY_OBJECT) {
           return Status::Invalid(
-              "Can only convert list types from NumPy object "
-              "array input");
+              "Can only convert list types from NumPy object array input");
         }
-        return internal::VisitSequence(obj, [this](PyObject* item, bool*) {
-          return value_converter_->Append(item);
-        });
+        return Extend(this->value_converter_.get(), value, /*reserved=*/0);
       }
       default: {
-        return Status::TypeError("Unknown list item type: ", value_type_->ToString());
+        return Status::TypeError("Unknown list item type: ", value_type->ToString());
       }
     }
   }
 
-  Status AppendValue(PyObject* obj) override {
-    RETURN_NOT_OK(this->typed_builder_->Append());
-    if (PyArray_Check(obj)) {
-      return AppendNdarrayItem(obj);
-    }
-    if (!PySequence_Check(obj)) {
-      return internal::InvalidType(obj,
-                                   "was not a sequence or recognized null"
-                                   " for conversion to list type");
-    }
-    int64_t list_size = static_cast<int64_t>(PySequence_Size(obj));
-    return value_converter_->Extend(obj, list_size);
-  }
-
-  Status GetResult(std::shared_ptr<ChunkedArray>* out) override {
-    // TODO: Improved handling of chunked children
-    if (value_converter_->num_chunks() > 0) {
-      return Status::Invalid("List child type ",
-                             value_converter_->builder()->type()->ToString(),
-                             " overflowed the capacity of a single chunk");
-    }
-    return SeqConverter::GetResult(out);
-  }
-
- protected:
-  std::shared_ptr<DataType> value_type_;
-  std::unique_ptr<SeqConverter> value_converter_;
-  const bool from_pandas_;
-  const bool strict_conversions_;
-  const bool ignore_timezone_;
-};
-
-template <typename TypeClass, NullCoding null_coding>
-class ListConverter : public BaseListConverter<TypeClass, null_coding> {
- public:
-  using BASE = BaseListConverter<TypeClass, null_coding>;
-  using BASE::BASE;
-};
+  template <typename ArrowType, int NUMPY_TYPE>
+  Status AppendNdarrayTyped(PyArrayObject* ndarray) {
+    // no need to go through the conversion
+    using NumpyTrait = internal::npy_traits<NUMPY_TYPE>;
+    using NumpyType = typename NumpyTrait::value_type;
+    using ValueBuilderType = typename TypeTraits<ArrowType>::BuilderType;
 
-template <NullCoding null_coding>
-class FixedSizeListConverter : public BaseListConverter<FixedSizeListType, null_coding> {
- public:
-  using BASE = BaseListConverter<FixedSizeListType, null_coding>;
-  using BASE::BASE;
+    const bool null_sentinels_possible =
+        // Always treat Numpy's NaT as null
+        NUMPY_TYPE == NPY_DATETIME || NUMPY_TYPE == NPY_TIMEDELTA ||
+        // Observing pandas's null sentinels
+        (this->options_.from_pandas && NumpyTrait::supports_nulls);
 
-  Status Init(ArrayBuilder* builder) override {
-    RETURN_NOT_OK(BASE::Init(builder));
-    list_size_ = checked_pointer_cast<FixedSizeListType>(builder->type())->list_size();
-    return Status::OK();
-  }
+    auto value_builder =
+        checked_cast<ValueBuilderType*>(this->value_converter_->builder().get());
 
-  Status AppendValue(PyObject* obj) override {
-    // the same as BaseListConverter but with additional length checks
-    RETURN_NOT_OK(this->typed_builder_->Append());
-    if (PyArray_Check(obj)) {
-      int64_t list_size = static_cast<int64_t>(PyArray_Size(obj));
-      if (list_size != list_size_) {
-        return Status::Invalid("Length of item not correct: expected ", list_size_,
-                               " but got array of size ", list_size);
+    // TODO(wesm): Vector append when not strided
+    Ndarray1DIndexer<NumpyType> values(ndarray);
+    if (null_sentinels_possible) {
+      for (int64_t i = 0; i < values.size(); ++i) {
+        if (NumpyTrait::isnull(values[i])) {
+          RETURN_NOT_OK(value_builder->AppendNull());
+        } else {
+          RETURN_NOT_OK(value_builder->Append(values[i]));
+        }
+      }
+    } else {
+      for (int64_t i = 0; i < values.size(); ++i) {
+        RETURN_NOT_OK(value_builder->Append(values[i]));
       }
-      return this->AppendNdarrayItem(obj);
-    }
-    if (!PySequence_Check(obj)) {
-      return internal::InvalidType(obj,
-                                   "was not a sequence or recognized null"
-                                   " for conversion to list type");
-    }
-    int64_t list_size = static_cast<int64_t>(PySequence_Size(obj));
-    if (list_size != list_size_) {
-      return Status::Invalid("Length of item not correct: expected ", list_size_,
-                             " but got list of size ", list_size);
     }
-    return this->value_converter_->Extend(obj, list_size);
+    return Status::OK();
   }
-
- protected:
-  int64_t list_size_;
 };
 
-// ----------------------------------------------------------------------
-// Convert maps
-
-// Define a MapConverter as a ListConverter that uses MapBuilder.value_builder
-// to append struct of key/value pairs
-template <NullCoding null_coding>
-class MapConverter : public BaseListConverter<MapType, null_coding> {
+class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait> {
  public:
-  using BASE = BaseListConverter<MapType, null_coding>;
-
-  explicit MapConverter(bool from_pandas, bool strict_conversions, bool ignore_timezone)
-      : BASE(from_pandas, strict_conversions, ignore_timezone), key_builder_(nullptr) {}
-
-  Status Append(PyObject* obj) override {
-    RETURN_NOT_OK(BASE::Append(obj));
-    return VerifyLastStructAppended();
-  }
-
-  Status Extend(PyObject* seq, int64_t size) override {
-    RETURN_NOT_OK(BASE::Extend(seq, size));
-    return VerifyLastStructAppended();
-  }
-
-  Status ExtendMasked(PyObject* seq, PyObject* mask, int64_t size) override {
-    RETURN_NOT_OK(BASE::ExtendMasked(seq, mask, size));
-    return VerifyLastStructAppended();
-  }
-
- protected:
-  Status VerifyLastStructAppended() {
-    // The struct_builder may not have field_builders initialized in constructor, so
-    // assign key_builder lazily
-    if (key_builder_ == nullptr) {
-      auto struct_builder =
-          checked_cast<StructBuilder*>(BASE::value_converter_->builder());
-      key_builder_ = struct_builder->field_builder(0);
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->struct_builder_->AppendNull();
     }
-    if (key_builder_->null_count() > 0) {
-      return Status::Invalid("Invalid Map: key field can not contain null values");
+    switch (input_kind_) {
+      case InputKind::DICT:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendDict(value);
+      case InputKind::TUPLE:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendTuple(value);
+      case InputKind::ITEMS:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendItems(value);
+      default:
+        RETURN_NOT_OK(InferInputKind(value));
+        return Append(value);
     }
-    return Status::OK();
   }
 
- private:
-  ArrayBuilder* key_builder_;
-};
-
-// ----------------------------------------------------------------------
-// Convert structs
+ protected:
+  Status Init(MemoryPool* pool) override {
+    RETURN_NOT_OK((StructConverter<PyConverter, PyConverterTrait>::Init(pool)));
 
-template <NullCoding null_coding>
-class StructConverter : public TypedConverter<StructType, null_coding> {
- public:
-  explicit StructConverter(bool from_pandas, bool strict_conversions,
-                           bool ignore_timezone)
-      : from_pandas_(from_pandas),
-        strict_conversions_(strict_conversions),
-        ignore_timezone_(ignore_timezone) {}
-
-  Status Init(ArrayBuilder* builder) override {
-    this->builder_ = builder;
-    this->typed_builder_ = checked_cast<StructBuilder*>(builder);
-    auto struct_type = checked_pointer_cast<StructType>(builder->type());
-
-    num_fields_ = this->typed_builder_->num_fields();
-    DCHECK_EQ(num_fields_, struct_type->num_fields());
-
-    field_name_bytes_list_.reset(PyList_New(num_fields_));
-    field_name_unicode_list_.reset(PyList_New(num_fields_));
+    // Store the field names as a PyObjects for dict matching
+    num_fields_ = this->struct_type_->num_fields();
+    bytes_field_names_.reset(PyList_New(num_fields_));
+    unicode_field_names_.reset(PyList_New(num_fields_));
     RETURN_IF_PYERROR();
 
-    // Initialize the child converters and field names
     for (int i = 0; i < num_fields_; i++) {
-      const std::string& field_name(struct_type->field(i)->name());
-      std::shared_ptr<DataType> field_type(struct_type->field(i)->type());
-
-      std::unique_ptr<SeqConverter> value_converter;
-      RETURN_NOT_OK(GetConverter(field_type, from_pandas_, strict_conversions_,
-                                 ignore_timezone_, &value_converter));
-      RETURN_NOT_OK(value_converter->Init(this->typed_builder_->field_builder(i)));
-      value_converters_.push_back(std::move(value_converter));
-
-      // Store the field name as a PyObject, for dict matching
-      PyObject* bytesobj =
-          PyBytes_FromStringAndSize(field_name.c_str(), field_name.size());
-      PyObject* unicodeobj =
+      const auto& field_name = this->struct_type_->field(i)->name();
+      PyObject* bytes = PyBytes_FromStringAndSize(field_name.c_str(), field_name.size());
+      PyObject* unicode =
           PyUnicode_FromStringAndSize(field_name.c_str(), field_name.size());
       RETURN_IF_PYERROR();
-      PyList_SET_ITEM(field_name_bytes_list_.obj(), i, bytesobj);
-      PyList_SET_ITEM(field_name_unicode_list_.obj(), i, unicodeobj);
+      PyList_SET_ITEM(bytes_field_names_.obj(), i, bytes);
+      PyList_SET_ITEM(unicode_field_names_.obj(), i, unicode);
     }
-
     return Status::OK();
   }
 
-  Status AppendValue(PyObject* obj) override {
-    RETURN_NOT_OK(this->typed_builder_->Append());
-    // Note heterogeneous sequences are not allowed
-    if (ARROW_PREDICT_FALSE(source_kind_ == SourceKind::UNKNOWN)) {
-      if (PyDict_Check(obj)) {
-        source_kind_ = SourceKind::DICTS;
-      } else if (PyTuple_Check(obj)) {
-        source_kind_ = SourceKind::TUPLES;
-      }
-    }
-    if (PyDict_Check(obj) && source_kind_ == SourceKind::DICTS) {
-      return AppendDictItem(obj);
-    } else if (PyTuple_Check(obj) && source_kind_ == SourceKind::TUPLES) {
-      return AppendTupleItem(obj);
+  Status InferInputKind(PyObject* value) {
+    // Infer input object's type, note that heterogeneous sequences are not allowed
+    if (PyDict_Check(value)) {
+      input_kind_ = InputKind::DICT;
+    } else if (PyTuple_Check(value)) {
+      input_kind_ = InputKind::TUPLE;
+    } else if (PySequence_Check(value)) {
+      input_kind_ = InputKind::ITEMS;
     } else {
-      return internal::InvalidType(obj,
-                                   "was not a dict, tuple, or recognized null value"
-                                   " for conversion to struct type");
+      return internal::InvalidType(value,
+                                   "was not a dict, tuple, or recognized null value "
+                                   "for conversion to struct type");
     }
+    return Status::OK();
   }
 
-  // Append a missing item
-  Status AppendNull() override { return this->typed_builder_->AppendNull(); }
+  Status InferKeyKind(PyObject* items) {
+    for (int i = 0; i < PySequence_Length(items); i++) {
+      // retrieve the key from the passed key-value pairs
+      ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
 
- protected:
-  Status AppendDictItem(PyObject* obj) {
-    if (dict_key_kind_ == DictKeyKind::UNICODE) {
-      return AppendDictItemWithUnicodeKeys(obj);
-    }
-    if (dict_key_kind_ == DictKeyKind::BYTES) {
-      return AppendDictItemWithBytesKeys(obj);
-    }
-    for (int i = 0; i < num_fields_; i++) {
-      PyObject* nameobj = PyList_GET_ITEM(field_name_unicode_list_.obj(), i);
-      PyObject* valueobj = PyDict_GetItem(obj, nameobj);
-      if (valueobj != NULL) {
-        dict_key_kind_ = DictKeyKind::UNICODE;
-        return AppendDictItemWithUnicodeKeys(obj);
-      }
+      // check key exists between the unicode field names
+      bool do_contain = PySequence_Contains(unicode_field_names_.obj(), pair.first);
       RETURN_IF_PYERROR();
-      // Unicode key not present, perhaps bytes key is?
-      nameobj = PyList_GET_ITEM(field_name_bytes_list_.obj(), i);
-      valueobj = PyDict_GetItem(obj, nameobj);
-      if (valueobj != NULL) {
-        dict_key_kind_ = DictKeyKind::BYTES;
-        return AppendDictItemWithBytesKeys(obj);
+      if (do_contain) {
+        key_kind_ = KeyKind::UNICODE;
+        return Status::OK();
       }
+
+      // check key exists between the bytes field names
+      do_contain = PySequence_Contains(bytes_field_names_.obj(), pair.first);
       RETURN_IF_PYERROR();
-    }
-    // If we come here, it means all keys are absent
-    for (int i = 0; i < num_fields_; i++) {
-      RETURN_NOT_OK(value_converters_[i]->Append(Py_None));
+      if (do_contain) {
+        key_kind_ = KeyKind::BYTES;
+        return Status::OK();
+      }
     }
     return Status::OK();
   }
 
-  Status AppendDictItemWithBytesKeys(PyObject* obj) {
-    return AppendDictItem(obj, field_name_bytes_list_.obj());
-  }
-
-  Status AppendDictItemWithUnicodeKeys(PyObject* obj) {
-    return AppendDictItem(obj, field_name_unicode_list_.obj());
-  }
-
-  Status AppendDictItem(PyObject* obj, PyObject* field_name_list) {
-    // NOTE we're ignoring any extraneous dict items
+  Status AppendEmpty() {
     for (int i = 0; i < num_fields_; i++) {
-      PyObject* nameobj = PyList_GET_ITEM(field_name_list, i);  // borrowed
-      PyObject* valueobj = PyDict_GetItem(obj, nameobj);        // borrowed
-      if (valueobj == NULL) {
-        RETURN_IF_PYERROR();
-      }
-      RETURN_NOT_OK(value_converters_[i]->Append(valueobj ? valueobj : Py_None));
+      RETURN_NOT_OK(this->children_[i]->Append(Py_None));
     }
     return Status::OK();
   }
 
-  Status AppendTupleItem(PyObject* obj) {
-    if (PyTuple_GET_SIZE(obj) != num_fields_) {
+  Status AppendTuple(PyObject* tuple) {
+    if (!PyTuple_Check(tuple)) {
+      return internal::InvalidType(tuple, "was expecting a tuple");
+    }
+    if (PyTuple_GET_SIZE(tuple) != num_fields_) {
       return Status::Invalid("Tuple size must be equal to number of struct fields");
     }
     for (int i = 0; i < num_fields_; i++) {
-      PyObject* valueobj = PyTuple_GET_ITEM(obj, i);
-      RETURN_NOT_OK(value_converters_[i]->Append(valueobj));
+      PyObject* value = PyTuple_GET_ITEM(tuple, i);
+      RETURN_NOT_OK(this->children_[i]->Append(value));
     }
     return Status::OK();
   }
 
-  std::vector<std::unique_ptr<SeqConverter>> value_converters_;
-  OwnedRef field_name_unicode_list_;
-  OwnedRef field_name_bytes_list_;
-  int num_fields_;
-  // Whether we're converting from a sequence of dicts or tuples
-  enum class SourceKind { UNKNOWN, DICTS, TUPLES } source_kind_ = SourceKind::UNKNOWN;
-  enum class DictKeyKind {
-    UNKNOWN,
-    BYTES,
-    UNICODE
-  } dict_key_kind_ = DictKeyKind::UNKNOWN;
-  bool from_pandas_;
-  bool strict_conversions_;
-  bool ignore_timezone_;
-};
-
-template <NullCoding null_coding>
-class DecimalConverter : public TypedConverter<arrow::Decimal128Type, null_coding> {
- public:
-  using BASE = TypedConverter<arrow::Decimal128Type, null_coding>;
-
-  Status Init(ArrayBuilder* builder) override {
-    RETURN_NOT_OK(BASE::Init(builder));
-    decimal_type_ = checked_pointer_cast<DecimalType>(this->typed_builder_->type());
-    return Status::OK();
+  Status AppendDict(PyObject* dict) {
+    if (!PyDict_Check(dict)) {
+      return internal::InvalidType(dict, "was expecting a dict");
+    }
+    switch (key_kind_) {
+      case KeyKind::UNICODE:
+        return AppendDict(dict, unicode_field_names_.obj());
+      case KeyKind::BYTES:
+        return AppendDict(dict, bytes_field_names_.obj());
+      default:
+        RETURN_NOT_OK(InferKeyKind(PyDict_Items(dict)));
+        if (key_kind_ == KeyKind::UNKNOWN) {
+          // was unable to infer the type which means that all keys are absent
+          return AppendEmpty();
+        } else {
+          return AppendDict(dict);
+        }
+    }
   }
 
-  Status AppendValue(PyObject* obj) override {
-    Decimal128 value;
-    RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *decimal_type_, &value));
-    return this->typed_builder_->Append(value);
+  Status AppendItems(PyObject* items) {
+    if (!PySequence_Check(items)) {
+      return internal::InvalidType(items, "was expecting a sequence of key-value items");
+    }
+    switch (key_kind_) {
+      case KeyKind::UNICODE:
+        return AppendItems(items, unicode_field_names_.obj());
+      case KeyKind::BYTES:
+        return AppendItems(items, bytes_field_names_.obj());
+      default:
+        RETURN_NOT_OK(InferKeyKind(items));
+        if (key_kind_ == KeyKind::UNKNOWN) {
+          // was unable to infer the type which means that all keys are absent
+          return AppendEmpty();
+        } else {
+          return AppendItems(items);
+        }
+    }
   }
 
- private:
-  std::shared_ptr<DecimalType> decimal_type_;
-};
-
-#define PRIMITIVE(TYPE_ENUM, TYPE)                                                   \
-  case Type::TYPE_ENUM:                                                              \
-    *out = std::unique_ptr<SeqConverter>(new PrimitiveConverter<TYPE, null_coding>); \
-    break;
-
-#define SIMPLE_CONVERTER_CASE(TYPE_ENUM, TYPE_CLASS)                   \
-  case Type::TYPE_ENUM:                                                \
-    *out = std::unique_ptr<SeqConverter>(new TYPE_CLASS<null_coding>); \
-    break;
-
-// Dynamic constructor for sequence converters
-template <NullCoding null_coding>
-Status GetConverterFlat(const std::shared_ptr<DataType>& type, bool strict_conversions,
-                        bool ignore_timezone, std::unique_ptr<SeqConverter>* out) {
-  switch (type->id()) {
-    SIMPLE_CONVERTER_CASE(NA, NullConverter);
-    PRIMITIVE(BOOL, BooleanType);
-    PRIMITIVE(INT8, Int8Type);
-    PRIMITIVE(INT16, Int16Type);
-    PRIMITIVE(INT32, Int32Type);
-    PRIMITIVE(INT64, Int64Type);
-    PRIMITIVE(UINT8, UInt8Type);
-    PRIMITIVE(UINT16, UInt16Type);
-    PRIMITIVE(UINT32, UInt32Type);
-    PRIMITIVE(UINT64, UInt64Type);
-    PRIMITIVE(HALF_FLOAT, HalfFloatType);
-    PRIMITIVE(FLOAT, FloatType);
-    PRIMITIVE(DOUBLE, DoubleType);
-    PRIMITIVE(DATE32, Date32Type);
-    PRIMITIVE(DATE64, Date64Type);
-    SIMPLE_CONVERTER_CASE(DECIMAL, DecimalConverter);
-    case Type::BINARY:
-      *out =
-          std::unique_ptr<SeqConverter>(new BinaryConverter<BinaryType, null_coding>());
-      break;
-    case Type::LARGE_BINARY:
-      *out = std::unique_ptr<SeqConverter>(
-          new BinaryConverter<LargeBinaryType, null_coding>());
-      break;
-    case Type::FIXED_SIZE_BINARY:
-      *out = std::unique_ptr<SeqConverter>(new FixedSizeBinaryConverter<null_coding>(
-          checked_cast<const FixedSizeBinaryType&>(*type).byte_width()));
-      break;
-    case Type::STRING:
-      if (strict_conversions) {
-        *out = std::unique_ptr<SeqConverter>(
-            new StringConverter<StringType, true, null_coding>());
-      } else {
-        *out = std::unique_ptr<SeqConverter>(
-            new StringConverter<StringType, false, null_coding>());
-      }
-      break;
-    case Type::LARGE_STRING:
-      if (strict_conversions) {
-        *out = std::unique_ptr<SeqConverter>(
-            new StringConverter<LargeStringType, true, null_coding>());
-      } else {
-        *out = std::unique_ptr<SeqConverter>(
-            new StringConverter<LargeStringType, false, null_coding>());
+  Status AppendDict(PyObject* dict, PyObject* field_names) {
+    // NOTE we're ignoring any extraneous dict items
+    for (int i = 0; i < num_fields_; i++) {
+      PyObject* name = PyList_GET_ITEM(field_names, i);  // borrowed
+      PyObject* value = PyDict_GetItem(dict, name);      // borrowed
+      if (value == NULL) {
+        RETURN_IF_PYERROR();
       }
-      break;
-    case Type::TIME32: {
-      auto unit = checked_cast<const Time32Type&>(*type).unit();
-      *out = std::unique_ptr<SeqConverter>(
-          new TimeConverter<Time32Type, null_coding>(unit, ignore_timezone));
-      break;
+      RETURN_NOT_OK(this->children_[i]->Append(value ? value : Py_None));
     }
-    case Type::TIME64: {
-      auto unit = checked_cast<const Time64Type&>(*type).unit();
-      *out = std::unique_ptr<SeqConverter>(
-          new TimeConverter<Time64Type, null_coding>(unit, ignore_timezone));
-      break;
-    }
-    case Type::TIMESTAMP: {
-      auto unit = checked_cast<const TimestampType&>(*type).unit();
-      *out = std::unique_ptr<SeqConverter>(
-          new TemporalConverter<TimestampType, null_coding>(unit, ignore_timezone));
-      break;
-    }
-    case Type::DURATION: {
-      auto unit = checked_cast<const DurationType&>(*type).unit();
-      *out =
-          std::unique_ptr<SeqConverter>(new TemporalConverter<DurationType, null_coding>(
-              unit, /*ignore_timezone=*/false));
-      break;
-    }
-    default:
-      return Status::NotImplemented("Sequence converter for type ", type->ToString(),
-                                    " not implemented");
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-Status GetConverter(const std::shared_ptr<DataType>& type, bool from_pandas,
-                    bool strict_conversions, bool ignore_timezone,
-                    std::unique_ptr<SeqConverter>* out) {
-  if (from_pandas) {
-    // ARROW-842: If pandas is not installed then null checks will be less
-    // comprehensive, but that is okay.
-    internal::InitPandasStaticData();
+  Result<std::pair<PyObject*, PyObject*>> GetKeyValuePair(PyObject* seq, int index) {
+    PyObject* pair = PySequence_GetItem(seq, index);
+    RETURN_IF_PYERROR();
+    if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
+      return internal::InvalidType(pair, "was expecting tuple of (key, value) pair");
+    }
+    PyObject* key = PyTuple_GetItem(pair, 0);
+    RETURN_IF_PYERROR();
+    PyObject* value = PyTuple_GetItem(pair, 1);
+    RETURN_IF_PYERROR();
+    return std::make_pair(key, value);
   }
 
-  switch (type->id()) {
-    case Type::LIST:
-      if (from_pandas) {
-        *out = std::unique_ptr<SeqConverter>(
-            new ListConverter<ListType, NullCoding::PANDAS_SENTINELS>(
-                from_pandas, strict_conversions, ignore_timezone));
-      } else {
-        *out = std::unique_ptr<SeqConverter>(
-            new ListConverter<ListType, NullCoding::NONE_ONLY>(
-                from_pandas, strict_conversions, ignore_timezone));
-      }
-      return Status::OK();
-    case Type::LARGE_LIST:
-      if (from_pandas) {
-        *out = std::unique_ptr<SeqConverter>(
-            new ListConverter<LargeListType, NullCoding::PANDAS_SENTINELS>(
-                from_pandas, strict_conversions, ignore_timezone));
-      } else {
-        *out = std::unique_ptr<SeqConverter>(
-            new ListConverter<LargeListType, NullCoding::NONE_ONLY>(
-                from_pandas, strict_conversions, ignore_timezone));
-      }
-      return Status::OK();
-    case Type::MAP:
-      if (from_pandas) {
-        *out =
-            std::unique_ptr<SeqConverter>(new MapConverter<NullCoding::PANDAS_SENTINELS>(
-                from_pandas, strict_conversions, ignore_timezone));
-      } else {
-        *out = std::unique_ptr<SeqConverter>(new MapConverter<NullCoding::NONE_ONLY>(
-            from_pandas, strict_conversions, ignore_timezone));
-      }
-      return Status::OK();
-    case Type::FIXED_SIZE_LIST:
-      if (from_pandas) {
-        *out = std::unique_ptr<SeqConverter>(
-            new FixedSizeListConverter<NullCoding::PANDAS_SENTINELS>(
-                from_pandas, strict_conversions, ignore_timezone));
-      } else {
-        *out = std::unique_ptr<SeqConverter>(
-            new FixedSizeListConverter<NullCoding::NONE_ONLY>(
-                from_pandas, strict_conversions, ignore_timezone));
-      }
-      return Status::OK();
-    case Type::STRUCT:
-      if (from_pandas) {
-        *out = std::unique_ptr<SeqConverter>(
-            new StructConverter<NullCoding::PANDAS_SENTINELS>(
-                from_pandas, strict_conversions, ignore_timezone));
+  Status AppendItems(PyObject* items, PyObject* field_names) {
+    auto length = static_cast<int>(PySequence_Size(items));
+    RETURN_IF_PYERROR();
+
+    // append the values for the defined fields
+    for (int i = 0; i < std::min(num_fields_, length); i++) {
+      // retrieve the key-value pair
+      ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
+
+      // validate that the key and the field name are equal
+      PyObject* name = PyList_GET_ITEM(field_names, i);
+      bool are_equal = PyObject_RichCompareBool(pair.first, name, Py_EQ);
+      RETURN_IF_PYERROR();
+
+      // finally append to the respective child builder
+      if (are_equal) {
+        RETURN_NOT_OK(this->children_[i]->Append(pair.second));
       } else {
-        *out = std::unique_ptr<SeqConverter>(new StructConverter<NullCoding::NONE_ONLY>(
-            from_pandas, strict_conversions, ignore_timezone));
+        ARROW_ASSIGN_OR_RAISE(auto key_view, PyBytesView::FromString(pair.first));
+        ARROW_ASSIGN_OR_RAISE(auto name_view, PyBytesView::FromString(name));
+        return Status::Invalid("The expected field name is `", name_view.bytes, "` but `",
+                               key_view.bytes, "` was given");
       }
-      return Status::OK();
-    default:
-      break;
-  }
-
-  if (from_pandas) {
-    RETURN_NOT_OK(GetConverterFlat<NullCoding::PANDAS_SENTINELS>(type, strict_conversions,
-                                                                 ignore_timezone, out));
-  } else {
-    RETURN_NOT_OK(GetConverterFlat<NullCoding::NONE_ONLY>(type, strict_conversions,
-                                                          ignore_timezone, out));
+    }
+    // insert null values for missing fields
+    for (int i = length; i < num_fields_; i++) {
+      RETURN_NOT_OK(this->children_[i]->AppendNull());
+    }
+    return Status::OK();
   }
-  return Status::OK();
-}
 
-// ----------------------------------------------------------------------
+  // Whether we're converting from a sequence of dicts or tuples or list of pairs
+  enum class InputKind { UNKNOWN, DICT, TUPLE, ITEMS } input_kind_ = InputKind::UNKNOWN;
+  // Whether the input dictionary keys' type is python bytes or unicode
+  enum class KeyKind { UNKNOWN, BYTES, UNICODE } key_kind_ = KeyKind::UNKNOWN;
+  // Store the field names as a PyObjects for dict matching
+  OwnedRef bytes_field_names_;
+  OwnedRef unicode_field_names_;
+  // Store the number of fields for later reuse
+  int num_fields_;
+};
 
 // Convert *obj* to a sequence if necessary
 // Fill *size* to its length.  If >= 0 on entry, *size* is an upper size
@@ -1352,64 +996,52 @@ Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* siz
   return Status::OK();
 }
 
-Status ConvertPySequence(PyObject* sequence_source, PyObject* mask,
-                         const PyConversionOptions& options,
-                         std::shared_ptr<ChunkedArray>* out) {
+Result<std::shared_ptr<ChunkedArray>> ConvertPySequence(PyObject* obj, PyObject* mask,
+                                                        PyConversionOptions options,
+                                                        MemoryPool* pool) {
   PyAcquireGIL lock;
 
   PyObject* seq;
   OwnedRef tmp_seq_nanny;
 
-  std::shared_ptr<DataType> real_type;
-
   int64_t size = options.size;
-  RETURN_NOT_OK(ConvertToSequenceAndInferSize(sequence_source, &seq, &size));
+  RETURN_NOT_OK(ConvertToSequenceAndInferSize(obj, &seq, &size));
   tmp_seq_nanny.reset(seq);
 
   // In some cases, type inference may be "loose", like strings. If the user
   // passed pa.string(), then we will error if we encounter any non-UTF8
   // value. If not, then we will allow the result to be a BinaryArray
-  bool strict_conversions = false;
-
   if (options.type == nullptr) {
-    RETURN_NOT_OK(InferArrowType(seq, mask, options.from_pandas, &real_type));
-    if (options.ignore_timezone && real_type->id() == Type::TIMESTAMP) {
-      const auto& ts_type = checked_cast<const TimestampType&>(*real_type);
-      real_type = timestamp(ts_type.unit());
-    }
+    ARROW_ASSIGN_OR_RAISE(options.type, InferArrowType(seq, mask, options.from_pandas));
+    options.strict = false;
   } else {
-    real_type = options.type;
-    strict_conversions = true;
+    options.strict = true;
   }
   DCHECK_GE(size, 0);
 
-  // Create the sequence converter, initialize with the builder
-  std::unique_ptr<SeqConverter> converter;
-  RETURN_NOT_OK(GetConverter(real_type, options.from_pandas, strict_conversions,
-                             options.ignore_timezone, &converter));
-
-  // Create ArrayBuilder for type, then pass into the SeqConverter
-  // instance. The reason this is created here rather than in GetConverter is
-  // because of nested types (child SeqConverter objects need the child
-  // builders created by MakeBuilder)
-  std::unique_ptr<ArrayBuilder> type_builder;
-  RETURN_NOT_OK(MakeBuilder(options.pool, real_type, &type_builder));
-  RETURN_NOT_OK(converter->Init(type_builder.get()));
-
-  // Convert values
-  if (mask != nullptr && mask != Py_None) {
-    RETURN_NOT_OK(converter->ExtendMasked(seq, mask, size));
+  ARROW_ASSIGN_OR_RAISE(auto converter, (MakeConverter<PyConverter, PyConverterTrait>(
+                                            options.type, options, pool)));
+  if (converter->may_overflow()) {
+    // The converter hierarchy contains binary- or list-like builders which can overflow
+    // depending on the input values. Wrap the converter with a chunker which detects
+    // the overflow and automatically creates new chunks.
+    ARROW_ASSIGN_OR_RAISE(auto chunked_converter, MakeChunker(std::move(converter)));
+    if (mask != nullptr && mask != Py_None) {
+      RETURN_NOT_OK(ExtendMasked(chunked_converter.get(), seq, mask, size));
+    } else {
+      RETURN_NOT_OK(Extend(chunked_converter.get(), seq, size));
+    }
+    return chunked_converter->ToChunkedArray();
   } else {
-    RETURN_NOT_OK(converter->Extend(seq, size));
+    // If the converter can't overflow spare the capacity error checking on the hot-path,
+    // this improves the performance roughly by ~10 for primitive types.
+    if (mask != nullptr && mask != Py_None) {
+      RETURN_NOT_OK(ExtendMasked(converter.get(), seq, mask, size));
+    } else {
+      RETURN_NOT_OK(Extend(converter.get(), seq, size));
+    }
+    return converter->ToChunkedArray();
   }
-
-  // Retrieve result. Conversion may yield one or more array values
-  return converter->GetResult(out);
-}
-
-Status ConvertPySequence(PyObject* obj, const PyConversionOptions& options,
-                         std::shared_ptr<ChunkedArray>* out) {
-  return ConvertPySequence(obj, nullptr, options, out);
 }
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/python_to_arrow.h b/cpp/src/arrow/python/python_to_arrow.h
index 5108e752e8f..d167996ba8d 100644
--- a/cpp/src/arrow/python/python_to_arrow.h
+++ b/cpp/src/arrow/python/python_to_arrow.h
@@ -39,20 +39,17 @@ class Status;
 namespace py {
 
 struct PyConversionOptions {
-  PyConversionOptions() : type(NULLPTR), size(-1), pool(NULLPTR), from_pandas(false) {}
+  PyConversionOptions() = default;
 
   PyConversionOptions(const std::shared_ptr<DataType>& type, int64_t size,
                       MemoryPool* pool, bool from_pandas)
-      : type(type), size(size), pool(default_memory_pool()), from_pandas(from_pandas) {}
+      : type(type), size(size), from_pandas(from_pandas) {}
 
   // Set to null if to be inferred
   std::shared_ptr<DataType> type;
 
-  // Default is -1: infer from data
-  int64_t size;
-
-  // Memory pool to use for allocations
-  MemoryPool* pool;
+  // Default is -1, which indicates the size should the same as the input sequence
+  int64_t size = -1;
 
   bool from_pandas = false;
 
@@ -60,6 +57,8 @@ struct PyConversionOptions {
   /// timezone bugs (see ARROW-9528).  Should be removed
   /// after Arrow 2.0 release.
   bool ignore_timezone = false;
+
+  bool strict = false;
 };
 
 /// \brief Convert sequence (list, generator, NumPy array with dtype object) of
@@ -69,16 +68,13 @@ struct PyConversionOptions {
 /// values in the sequence are null (true) or not null (false). This parameter
 /// may be null
 /// \param[in] options various conversion options
-/// \param[out] out a ChunkedArray containing one or more chunks
-/// \return Status
+/// \param[in] pool MemoryPool to use for allocations
+/// \return Result ChunkedArray
 ARROW_PYTHON_EXPORT
-Status ConvertPySequence(PyObject* obj, PyObject* mask,
-                         const PyConversionOptions& options,
-                         std::shared_ptr<ChunkedArray>* out);
-
-ARROW_PYTHON_EXPORT
-Status ConvertPySequence(PyObject* obj, const PyConversionOptions& options,
-                         std::shared_ptr<ChunkedArray>* out);
+Result<std::shared_ptr<ChunkedArray>> ConvertPySequence(
+    PyObject* obj, PyObject* mask, PyConversionOptions options,
+    MemoryPool* pool = default_memory_pool());
 
 }  // namespace py
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc
index cefa97abeea..7b91c24f63c 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -37,6 +37,7 @@
 #include "arrow/ipc/writer.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/result.h"
 #include "arrow/tensor.h"
 #include "arrow/util/logging.h"
 
@@ -482,8 +483,7 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
     RETURN_NOT_OK(internal::CastSize(PyBytes_GET_SIZE(elem), &size));
     RETURN_NOT_OK(builder->AppendBytes(data, size));
   } else if (PyUnicode_Check(elem)) {
-    PyBytesView view;
-    RETURN_NOT_OK(view.FromString(elem));
+    ARROW_ASSIGN_OR_RAISE(auto view, PyBytesView::FromUnicode(elem));
     int32_t size = -1;
     RETURN_NOT_OK(internal::CastSize(view.size, &size));
     RETURN_NOT_OK(builder->AppendString(view.bytes, size));
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 4a007dd8782..946d3bfe44f 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -428,8 +428,8 @@ struct ARROW_EXPORT DictionaryScalar : public Scalar {
 
   explicit DictionaryScalar(std::shared_ptr<DataType> type);
 
-  DictionaryScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  DictionaryScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(value)) {}
 
   Result<std::shared_ptr<Scalar>> GetEncodedValue() const;
 };
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 4171164899d..dc8708f689e 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -627,6 +627,8 @@ TEST(TestDictionaryScalar, Basics) {
     gamma.dictionary = dict;
 
     auto scalar_null = MakeNullScalar(ty);
+    checked_cast<DictionaryScalar&>(*scalar_null).value.dictionary = dict;
+
     auto scalar_alpha = DictionaryScalar(alpha, ty);
     auto scalar_gamma = DictionaryScalar(gamma, ty);
 
@@ -654,6 +656,12 @@ TEST(TestDictionaryScalar, Basics) {
     ASSERT_TRUE(first->Equals(scalar_gamma));
     ASSERT_TRUE(second->Equals(scalar_alpha));
     ASSERT_TRUE(last->Equals(scalar_null));
+
+    auto first_dict_scalar = checked_cast<const DictionaryScalar&>(*first);
+    ASSERT_TRUE(first_dict_scalar.value.dictionary->Equals(arr.dictionary()));
+
+    auto second_dict_scalar = checked_cast<const DictionaryScalar&>(*second);
+    ASSERT_TRUE(second_dict_scalar.value.dictionary->Equals(arr.dictionary()));
   }
 }
 
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 6592af39557..31f1c1bc7f8 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -254,7 +254,7 @@ ARROW_TESTING_EXPORT void TestInitialized(const Array& array);
 
 template <typename BuilderType>
 void FinishAndCheckPadding(BuilderType* builder, std::shared_ptr<Array>* out) {
-  ASSERT_OK(builder->Finish(out));
+  ASSERT_OK_AND_ASSIGN(*out, builder->Finish());
   AssertZeroPadded(**out);
   TestInitialized(**out);
 }
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index f95edb0c896..d2abe573cd5 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -530,12 +530,20 @@ using enable_if_base_binary = enable_if_t<is_base_binary_type<T>::value, R>;
 
 // Any binary excludes string from Base binary
 template <typename T>
-using is_any_binary_type =
+using is_binary_type =
     std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
                                      std::is_same<LargeBinaryType, T>::value>;
 
 template <typename T, typename R = void>
-using enable_if_any_binary = enable_if_t<is_any_binary_type<T>::value, R>;
+using enable_if_binary = enable_if_t<is_binary_type<T>::value, R>;
+
+template <typename T>
+using is_string_type =
+    std::integral_constant<bool, std::is_same<StringType, T>::value ||
+                                     std::is_same<LargeStringType, T>::value>;
+
+template <typename T, typename R = void>
+using enable_if_string = enable_if_t<is_string_type<T>::value, R>;
 
 template <typename T>
 using is_string_like_type =
@@ -544,6 +552,9 @@ using is_string_like_type =
 template <typename T, typename R = void>
 using enable_if_string_like = enable_if_t<is_string_like_type<T>::value, R>;
 
+template <typename T, typename U, typename R = void>
+using enable_if_same = enable_if_t<std::is_same<T, U>::value, R>;
+
 // Note that this also includes DecimalType
 template <typename T>
 using is_fixed_size_binary_type = std::is_base_of<FixedSizeBinaryType, T>;
@@ -574,6 +585,9 @@ using is_nested_type = std::is_base_of<NestedType, T>;
 template <typename T, typename R = void>
 using enable_if_nested = enable_if_t<is_nested_type<T>::value, R>;
 
+template <typename T, typename R = void>
+using enable_if_not_nested = enable_if_t<!is_nested_type<T>::value, R>;
+
 template <typename T>
 using is_var_length_list_type =
     std::integral_constant<bool, std::is_base_of<LargeListType, T>::value ||
@@ -596,6 +610,15 @@ using is_fixed_size_list_type = std::is_same<FixedSizeListType, T>;
 template <typename T, typename R = void>
 using enable_if_fixed_size_list = enable_if_t<is_fixed_size_list_type<T>::value, R>;
 
+template <typename T>
+using is_list_type =
+    std::integral_constant<bool, std::is_same<T, ListType>::value ||
+                                     std::is_same<T, LargeListType>::value ||
+                                     std::is_same<T, FixedSizeListType>::valuae>;
+
+template <typename T, typename R = void>
+using enable_if_list_type = enable_if_t<is_list_type<T>::value, R>;
+
 template <typename T>
 using is_list_like_type =
     std::integral_constant<bool, is_base_list_type<T>::value ||
@@ -654,6 +677,18 @@ using is_interval_type = std::is_base_of<IntervalType, T>;
 template <typename T, typename R = void>
 using enable_if_interval = enable_if_t<is_interval_type<T>::value, R>;
 
+template <typename T>
+using is_dictionary_type = std::is_base_of<DictionaryType, T>;
+
+template <typename T, typename R = void>
+using enable_if_dictionary = enable_if_t<is_dictionary_type<T>::value, R>;
+
+template <typename T>
+using is_extension_type = std::is_base_of<ExtensionType, T>;
+
+template <typename T, typename R = void>
+using enable_if_extension = enable_if_t<is_extension_type<T>::value, R>;
+
 // Attribute differentiation
 
 template <typename T>
@@ -670,8 +705,12 @@ template <typename T, typename R = void>
 using enable_if_has_c_type = enable_if_t<has_c_type<T>::value, R>;
 
 template <typename T>
-using has_string_view = std::integral_constant<bool, is_binary_like_type<T>::value ||
-                                                         is_string_like_type<T>::value>;
+using has_string_view =
+    std::integral_constant<bool, std::is_same<BinaryType, T>::value ||
+                                     std::is_same<LargeBinaryType, T>::value ||
+                                     std::is_same<StringType, T>::value ||
+                                     std::is_same<LargeStringType, T>::value ||
+                                     std::is_same<FixedSizeBinaryType, T>::value>;
 
 template <typename T, typename R = void>
 using enable_if_has_string_view = enable_if_t<has_string_view<T>::value, R>;
diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h
new file mode 100644
index 00000000000..d60d8d056fa
--- /dev/null
+++ b/cpp/src/arrow/util/converter.h
@@ -0,0 +1,324 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/builder.h"
+#include "arrow/chunked_array.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+static Result<std::unique_ptr<BaseConverter>> MakeConverter(
+    std::shared_ptr<DataType> type, typename BaseConverter::OptionsType options,
+    MemoryPool* pool);
+
+template <typename Input, typename Options>
+class Converter {
+ public:
+  using Self = Converter<Input, Options>;
+  using InputType = Input;
+  using OptionsType = Options;
+
+  virtual ~Converter() = default;
+
+  Status Construct(std::shared_ptr<DataType> type, OptionsType options,
+                   MemoryPool* pool) {
+    type_ = std::move(type);
+    options_ = std::move(options);
+    return Init(pool);
+  }
+
+  virtual Status Append(InputType value) = 0;
+
+  const std::shared_ptr<ArrayBuilder>& builder() const { return builder_; }
+
+  const std::shared_ptr<DataType>& type() const { return type_; }
+
+  OptionsType options() const { return options_; }
+
+  bool may_overflow() const { return may_overflow_; }
+
+  virtual Status Reserve(int64_t additional_capacity) {
+    return builder_->Reserve(additional_capacity);
+  }
+
+  Status AppendNull() { return builder_->AppendNull(); }
+
+  virtual Result<std::shared_ptr<Array>> ToArray() { return builder_->Finish(); }
+
+  virtual Result<std::shared_ptr<Array>> ToArray(int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(auto arr, this->ToArray());
+    return arr->Slice(0, length);
+  }
+
+  virtual Result<std::shared_ptr<ChunkedArray>> ToChunkedArray() {
+    ARROW_ASSIGN_OR_RAISE(auto array, ToArray());
+    std::vector<std::shared_ptr<Array>> chunks = {std::move(array)};
+    return std::make_shared<ChunkedArray>(chunks);
+  }
+
+ protected:
+  virtual Status Init(MemoryPool* pool) { return Status::OK(); }
+
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<ArrayBuilder> builder_;
+  OptionsType options_;
+  bool may_overflow_ = false;
+};
+
+template <typename ArrowType, typename BaseConverter>
+class PrimitiveConverter : public BaseConverter {
+ public:
+  using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    this->builder_ = std::make_shared<BuilderType>(this->type_, pool);
+    this->may_overflow_ =
+        is_base_binary_like(this->type_->id()) || is_fixed_size_binary(this->type_->id());
+    primitive_type_ = checked_cast<const ArrowType*>(this->type_.get());
+    primitive_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    return Status::OK();
+  }
+
+  const ArrowType* primitive_type_;
+  BuilderType* primitive_builder_;
+};
+
+template <typename ArrowType, typename BaseConverter,
+          template <typename...> class ConverterTrait>
+class ListConverter : public BaseConverter {
+ public:
+  using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+  using ConverterType = typename ConverterTrait<ArrowType>::type;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    list_type_ = checked_cast<const ArrowType*>(this->type_.get());
+    ARROW_ASSIGN_OR_RAISE(value_converter_,
+                          (MakeConverter<BaseConverter, ConverterTrait>(
+                              list_type_->value_type(), this->options_, pool)));
+    this->builder_ =
+        std::make_shared<BuilderType>(pool, value_converter_->builder(), this->type_);
+    list_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    this->may_overflow_ = true;
+    return Status::OK();
+  }
+
+  const ArrowType* list_type_;
+  BuilderType* list_builder_;
+  std::unique_ptr<BaseConverter> value_converter_;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+class StructConverter : public BaseConverter {
+ public:
+  using ConverterType = typename ConverterTrait<StructType>::type;
+
+  Status Reserve(int64_t additional_capacity) override {
+    ARROW_RETURN_NOT_OK(this->builder_->Reserve(additional_capacity));
+    for (const auto& child : children_) {
+      ARROW_RETURN_NOT_OK(child->Reserve(additional_capacity));
+    }
+    return Status::OK();
+  }
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    std::unique_ptr<BaseConverter> child_converter;
+    std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+
+    struct_type_ = checked_cast<const StructType*>(this->type_.get());
+    for (const auto& field : struct_type_->fields()) {
+      ARROW_ASSIGN_OR_RAISE(child_converter,
+                            (MakeConverter<BaseConverter, ConverterTrait>(
+                                field->type(), this->options_, pool)));
+      this->may_overflow_ |= child_converter->may_overflow();
+      child_builders.push_back(child_converter->builder());
+      children_.push_back(std::move(child_converter));
+    }
+
+    this->builder_ =
+        std::make_shared<StructBuilder>(this->type_, pool, std::move(child_builders));
+    struct_builder_ = checked_cast<StructBuilder*>(this->builder_.get());
+
+    return Status::OK();
+  }
+
+  const StructType* struct_type_;
+  StructBuilder* struct_builder_;
+  std::vector<std::unique_ptr<BaseConverter>> children_;
+};
+
+template <typename ValueType, typename BaseConverter>
+class DictionaryConverter : public BaseConverter {
+ public:
+  using BuilderType = DictionaryBuilder<ValueType>;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    std::unique_ptr<ArrayBuilder> builder;
+    ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, this->type_, NULLPTR, &builder));
+    this->builder_ = std::move(builder);
+    this->may_overflow_ = false;
+    dict_type_ = checked_cast<const DictionaryType*>(this->type_.get());
+    value_type_ = checked_cast<const ValueType*>(dict_type_->value_type().get());
+    value_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    return Status::OK();
+  }
+
+  const DictionaryType* dict_type_;
+  const ValueType* value_type_;
+  BuilderType* value_builder_;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+struct MakeConverterImpl {
+  template <typename T, typename ConverterType = typename ConverterTrait<T>::type>
+  Status Visit(const T&) {
+    out.reset(new ConverterType());
+    return out->Construct(std::move(type), std::move(options), pool);
+  }
+
+  Status Visit(const DictionaryType& t) {
+    switch (t.value_type()->id()) {
+#define DICTIONARY_CASE(TYPE)                                                       \
+  case TYPE::type_id:                                                               \
+    out = internal::make_unique<                                                    \
+        typename ConverterTrait<DictionaryType>::template dictionary_type<TYPE>>(); \
+    break;
+      DICTIONARY_CASE(BooleanType);
+      DICTIONARY_CASE(Int8Type);
+      DICTIONARY_CASE(Int16Type);
+      DICTIONARY_CASE(Int32Type);
+      DICTIONARY_CASE(Int64Type);
+      DICTIONARY_CASE(UInt8Type);
+      DICTIONARY_CASE(UInt16Type);
+      DICTIONARY_CASE(UInt32Type);
+      DICTIONARY_CASE(UInt64Type);
+      DICTIONARY_CASE(FloatType);
+      DICTIONARY_CASE(DoubleType);
+      DICTIONARY_CASE(BinaryType);
+      DICTIONARY_CASE(StringType);
+      DICTIONARY_CASE(FixedSizeBinaryType);
+#undef DICTIONARY_CASE
+      default:
+        return Status::NotImplemented("DictionaryArray converter for type ", t.ToString(),
+                                      " not implemented");
+    }
+    return out->Construct(std::move(type), std::move(options), pool);
+  }
+
+  Status Visit(const DataType& t) { return Status::NotImplemented(t.name()); }
+
+  std::shared_ptr<DataType> type;
+  typename BaseConverter::OptionsType options;
+  MemoryPool* pool;
+  std::unique_ptr<BaseConverter> out;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+static Result<std::unique_ptr<BaseConverter>> MakeConverter(
+    std::shared_ptr<DataType> type, typename BaseConverter::OptionsType options,
+    MemoryPool* pool) {
+  MakeConverterImpl<BaseConverter, ConverterTrait> visitor{
+      std::move(type), std::move(options), pool, NULLPTR};
+  ARROW_RETURN_NOT_OK(VisitTypeInline(*visitor.type, &visitor));
+  return std::move(visitor.out);
+}
+
+template <typename Converter>
+class Chunker {
+ public:
+  using InputType = typename Converter::InputType;
+
+  explicit Chunker(std::unique_ptr<Converter> converter)
+      : converter_(std::move(converter)) {}
+
+  Status Reserve(int64_t additional_capacity) {
+    ARROW_RETURN_NOT_OK(converter_->Reserve(additional_capacity));
+    reserved_ += additional_capacity;
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    auto status = converter_->AppendNull();
+    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
+      ARROW_RETURN_NOT_OK(FinishChunk());
+      return converter_->AppendNull();
+    }
+    ++length_;
+    return status;
+  }
+
+  Status Append(InputType value) {
+    auto status = converter_->Append(value);
+    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
+      ARROW_RETURN_NOT_OK(FinishChunk());
+      return Append(value);
+    }
+    ++length_;
+    return status;
+  }
+
+  Status FinishChunk() {
+    ARROW_ASSIGN_OR_RAISE(auto chunk, converter_->ToArray(length_));
+    chunks_.push_back(chunk);
+    // reserve space for the remaining items, besides being an optimization it is also
+    // required if the converter's implementation relies on unsafe builder methods in
+    // conveter->Append()
+    auto remaining = reserved_ - length_;
+    Reset();
+    return Reserve(remaining);
+  }
+
+  Result<std::shared_ptr<ChunkedArray>> ToChunkedArray() {
+    ARROW_RETURN_NOT_OK(FinishChunk());
+    return std::make_shared<ChunkedArray>(chunks_);
+  }
+
+ protected:
+  void Reset() {
+    converter_->builder()->Reset();
+    length_ = 0;
+    reserved_ = 0;
+  }
+
+  int64_t length_ = 0;
+  int64_t reserved_ = 0;
+  std::unique_ptr<Converter> converter_;
+  std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+template <typename T>
+static Result<std::unique_ptr<Chunker<T>>> MakeChunker(std::unique_ptr<T> converter) {
+  return internal::make_unique<Chunker<T>>(std::move(converter));
+}
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 639421cefe9..f1c4b1e6318 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -842,6 +842,11 @@ struct HashTraits<T, enable_if_t<has_string_view<T>::value &&
   using MemoTableType = BinaryMemoTable<BinaryBuilder>;
 };
 
+template <>
+struct HashTraits<Decimal128Type> {
+  using MemoTableType = BinaryMemoTable<BinaryBuilder>;
+};
+
 template <typename T>
 struct HashTraits<T, enable_if_t<std::is_base_of<LargeBinaryType, T>::value>> {
   using MemoTableType = BinaryMemoTable<LargeBinaryBuilder>;
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a9f1be221f9..c5c06cec031 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -21,8 +21,10 @@ import warnings
 
 cdef _sequence_to_array(object sequence, object mask, object size,
                         DataType type, CMemoryPool* pool, c_bool from_pandas):
-    cdef int64_t c_size
-    cdef PyConversionOptions options
+    cdef:
+        int64_t c_size
+        PyConversionOptions options
+        shared_ptr[CChunkedArray] chunked
 
     if type is not None:
         options.type = type.sp_type
@@ -30,19 +32,18 @@ cdef _sequence_to_array(object sequence, object mask, object size,
     if size is not None:
         options.size = size
 
-    options.pool = pool
     options.from_pandas = from_pandas
     options.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
 
-    cdef shared_ptr[CChunkedArray] out
-
     with nogil:
-        check_status(ConvertPySequence(sequence, mask, options, &out))
+        chunked = GetResultValue(
+            ConvertPySequence(sequence, mask, options, pool)
+        )
 
-    if out.get().num_chunks() == 1:
-        return pyarrow_wrap_array(out.get().chunk(0))
+    if chunked.get().num_chunks() == 1:
+        return pyarrow_wrap_array(chunked.get().chunk(0))
     else:
-        return pyarrow_wrap_chunked_array(out)
+        return pyarrow_wrap_chunked_array(chunked)
 
 
 cdef inline _is_array_like(obj):
@@ -158,28 +159,52 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
     Notes
     -----
     Localized timestamps will currently be returned as UTC (pandas's native
-    representation).  Timezone-naive data will be implicitly interpreted as
+    representation). Timezone-naive data will be implicitly interpreted as
     UTC.
 
+    Converting to dictionary array will promote to a wider integer type for
+    indices if the number of distinct values cannot be represented, even if
+    the index type was explicitly set. This means that if there are more than
+    127 values the returned dictionary array's index type will be at least
+    pa.int16() even if pa.int8() was passed to the function. Note that an
+    explicit index type will not be demoted even if it is wider than required.
+
     Examples
     --------
     >>> import pandas as pd
     >>> import pyarrow as pa
     >>> pa.array(pd.Series([1, 2]))
-    <pyarrow.array.Int64Array object at 0x7f674e4c0e10>
+    <pyarrow.lib.Int64Array object at 0x7f674e4c0e10>
     [
       1,
       2
     ]
 
+    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
+    <pyarrow.lib.DictionaryArray object at 0x7feb288d9040>
+    -- dictionary:
+    [
+      "a",
+      "b"
+    ]
+    -- indices:
+    [
+      0,
+      1,
+      0
+    ]
+
     >>> import numpy as np
-    >>> pa.array(pd.Series([1, 2]), np.array([0, 1],
-    ... dtype=bool))
-    <pyarrow.array.Int64Array object at 0x7f9019e11208>
+    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
+    <pyarrow.lib.Int64Array object at 0x7f9019e11208>
     [
       1,
       null
     ]
+
+    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
+    >>> arr.type.index_type
+    DataType(int16)
     """
     cdef:
         CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -460,7 +485,7 @@ def infer_type(values, mask=None, from_pandas=False):
     if mask is not None and not isinstance(mask, np.ndarray):
         mask = np.array(mask, dtype=bool)
 
-    check_status(InferArrowType(values, mask, use_pandas_sentinels, &out))
+    out = GetResultValue(InferArrowType(values, mask, use_pandas_sentinels))
     return pyarrow_wrap_data_type(out)
 
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9a3451e4f82..d1c4110cb40 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -969,12 +969,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         vector[shared_ptr[CScalar]] value
         CResult[shared_ptr[CScalar]] field(CFieldRef ref) const
 
-    cdef cppclass CDictionaryScalar" arrow::DictionaryScalar"(CScalar):
-        cppclass CDictionaryValue "arrow::DictionaryScalar::ValueType":
-            shared_ptr[CScalar] index
-            shared_ptr[CArray] dictionary
+    cdef cppclass CDictionaryScalarIndexAndDictionary \
+            "arrow::DictionaryScalar::ValueType":
+        shared_ptr[CScalar] index
+        shared_ptr[CArray] dictionary
 
-        CDictionaryValue value
+    cdef cppclass CDictionaryScalar" arrow::DictionaryScalar"(CScalar):
+        CDictionaryScalar(CDictionaryScalarIndexAndDictionary value,
+                          shared_ptr[CDataType], c_bool is_valid)
+        CDictionaryScalarIndexAndDictionary value
         CResult[shared_ptr[CScalar]] GetEncodedValue()
 
     cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
@@ -1746,9 +1749,8 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
 cdef extern from "arrow/python/api.h" namespace "arrow::py":
     # Requires GIL
-    CStatus InferArrowType(object obj, object mask,
-                           c_bool pandas_null_sentinels,
-                           shared_ptr[CDataType]* out_type)
+    CResult[shared_ptr[CDataType]] InferArrowType(
+        object obj, object mask, c_bool pandas_null_sentinels)
 
 
 cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
@@ -1764,12 +1766,13 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         CMemoryPool* pool
         c_bool from_pandas
         c_bool ignore_timezone
+        c_bool strict
 
     # TODO Some functions below are not actually "nogil"
 
-    CStatus ConvertPySequence(object obj, object mask,
-                              const PyConversionOptions& options,
-                              shared_ptr[CChunkedArray]* out)
+    CResult[shared_ptr[CChunkedArray]] ConvertPySequence(
+        object obj, object mask, const PyConversionOptions& options,
+        CMemoryPool* pool)
 
     CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
 
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index cc06ca6a2f9..3e72d060d69 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -15,7 +15,6 @@
 # specific language governing permissions and limitations
 # under the License.
 
-
 import collections
 
 
@@ -687,6 +686,50 @@ cdef class DictionaryScalar(Scalar):
     Concrete class for dictionary-encoded scalars.
     """
 
+    @classmethod
+    def _reconstruct(cls, type, is_valid, index, dictionary):
+        cdef:
+            CDictionaryScalarIndexAndDictionary value
+            shared_ptr[CDictionaryScalar] wrapped
+            DataType type_
+            Scalar index_
+            Array dictionary_
+
+        type_ = ensure_type(type, allow_none=False)
+        if not isinstance(type_, DictionaryType):
+            raise TypeError('Must pass a DictionaryType instance')
+
+        if isinstance(index, Scalar):
+            if not index.type.equals(type.index_type):
+                raise TypeError("The Scalar value passed as index must have "
+                                "identical type to the dictionary type's "
+                                "index_type")
+            index_ = index
+        else:
+            index_ = scalar(index, type=type_.index_type)
+
+        if isinstance(dictionary, Array):
+            if not dictionary.type.equals(type.value_type):
+                raise TypeError("The Array passed as dictionary must have "
+                                "identical type to the dictionary type's "
+                                "value_type")
+            dictionary_ = dictionary
+        else:
+            dictionary_ = array(dictionary, type=type_.value_type)
+
+        value.index = pyarrow_unwrap_scalar(index_)
+        value.dictionary = pyarrow_unwrap_array(dictionary_)
+
+        wrapped = make_shared[CDictionaryScalar](
+            value, pyarrow_unwrap_data_type(type_), <c_bool>(is_valid)
+        )
+        return Scalar.wrap(<shared_ptr[CScalar]> wrapped)
+
+    def __reduce__(self):
+        return DictionaryScalar._reconstruct, (
+            self.type, self.is_valid, self.index, self.dictionary
+        )
+
     @property
     def index(self):
         """
@@ -831,14 +874,15 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
         shared_ptr[CArray] array
         shared_ptr[CChunkedArray] chunked
         bint is_pandas_object = False
+        CMemoryPool* pool
 
     type = ensure_type(type, allow_none=True)
+    pool = maybe_unbox_memory_pool(memory_pool)
 
     if _is_array_like(value):
         value = get_values(value, &is_pandas_object)
 
     options.size = 1
-    options.pool = maybe_unbox_memory_pool(memory_pool)
 
     if type is not None:
         ty = ensure_type(type)
@@ -851,9 +895,12 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
 
     value = [value]
     with nogil:
-        check_status(ConvertPySequence(value, None, options, &chunked))
+        chunked = GetResultValue(ConvertPySequence(value, None, options, pool))
 
+    # get the first chunk
     assert chunked.get().num_chunks() == 1
     array = chunked.get().chunk(0)
+
+    # retrieve the scalar from the first position
     scalar = GetResultValue(array.get().GetScalar(0))
     return Scalar.wrap(scalar)
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 088f29185bd..97e972d84d5 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import datetime
+
 import pytz
 import hypothesis as h
 import hypothesis.strategies as st
@@ -82,10 +84,16 @@
     unit=st.sampled_from(['s', 'ms', 'us', 'ns']),
     tz=tzst.timezones()
 )
-duration_types = st.sampled_from([
-    pa.duration(unit) for unit in ['s', 'ms', 'us', 'ns']])
+duration_types = st.builds(
+    pa.duration,
+    st.sampled_from(['s', 'ms', 'us', 'ns'])
+)
 temporal_types = st.one_of(
-    date_types, time_types, timestamp_types, duration_types)
+    date_types,
+    time_types,
+    timestamp_types,
+    duration_types
+)
 
 primitive_types = st.one_of(
     null_type,
@@ -101,9 +109,16 @@
 metadata = st.dictionaries(st.text(), st.text())
 
 
-def fields(type_strategy=primitive_types):
-    return st.builds(pa.field, name=custom_text, type=type_strategy,
-                     nullable=st.booleans(), metadata=metadata)
+@st.composite
+def fields(draw, type_strategy=primitive_types):
+    name = draw(custom_text)
+    typ = draw(type_strategy)
+    if pa.types.is_null(typ):
+        nullable = True
+    else:
+        nullable = draw(st.booleans())
+    meta = draw(metadata)
+    return pa.field(name, type=typ, nullable=nullable, metadata=meta)
 
 
 def list_types(item_strategy=primitive_types):
@@ -152,8 +167,10 @@ def schemas(type_strategy=primitive_types, max_fields=None):
 @st.composite
 def arrays(draw, type, size=None):
     if isinstance(type, st.SearchStrategy):
-        type = draw(type)
-    elif not isinstance(type, pa.DataType):
+        ty = draw(type)
+    elif isinstance(type, pa.DataType):
+        ty = type
+    else:
         raise TypeError('Type must be a pyarrow DataType')
 
     if isinstance(size, st.SearchStrategy):
@@ -165,57 +182,67 @@ def arrays(draw, type, size=None):
 
     shape = (size,)
 
-    if pa.types.is_list(type) or pa.types.is_large_list(type):
+    if pa.types.is_list(ty) or pa.types.is_large_list(ty):
         offsets = draw(npst.arrays(np.uint8(), shape=shape)).cumsum() // 20
         offsets = np.insert(offsets, 0, 0, axis=0)  # prepend with zero
-        values = draw(arrays(type.value_type, size=int(offsets.sum())))
-        array_type = (
-            pa.LargeListArray if pa.types.is_large_list(type)
-            else pa.ListArray)
+        values = draw(arrays(ty.value_type, size=int(offsets.sum())))
+        if pa.types.is_large_list(ty):
+            array_type = pa.LargeListArray
+        else:
+            array_type = pa.ListArray
         return array_type.from_arrays(offsets, values)
 
-    if pa.types.is_struct(type):
-        h.assume(len(type) > 0)
+    if pa.types.is_struct(ty):
+        h.assume(len(ty) > 0)
         fields, child_arrays = [], []
-        for field in type:
+        for field in ty:
             fields.append(field)
             child_arrays.append(draw(arrays(field.type, size=size)))
         return pa.StructArray.from_arrays(child_arrays, fields=fields)
 
-    if (pa.types.is_boolean(type) or pa.types.is_integer(type) or
-            pa.types.is_floating(type)):
-        values = npst.arrays(type.to_pandas_dtype(), shape=(size,))
+    if (pa.types.is_boolean(ty) or pa.types.is_integer(ty) or
+            pa.types.is_floating(ty)):
+        values = npst.arrays(ty.to_pandas_dtype(), shape=(size,))
         np_arr = draw(values)
-        if pa.types.is_floating(type):
+        if pa.types.is_floating(ty):
             # Workaround ARROW-4952: no easy way to assert array equality
             # in a NaN-tolerant way.
             np_arr[np.isnan(np_arr)] = -42.0
-        return pa.array(np_arr, type=type)
+        return pa.array(np_arr, type=ty)
 
-    if pa.types.is_null(type):
+    if pa.types.is_null(ty):
         value = st.none()
-    elif pa.types.is_time(type):
+    elif pa.types.is_time(ty):
         value = st.times()
-    elif pa.types.is_date(type):
+    elif pa.types.is_date(ty):
         value = st.dates()
-    elif pa.types.is_timestamp(type):
-        tz = pytz.timezone(type.tz) if type.tz is not None else None
-        value = st.datetimes(timezones=st.just(tz))
-    elif pa.types.is_duration(type):
+    elif pa.types.is_timestamp(ty):
+        min_int64 = -(2**63)
+        max_int64 = 2**63 - 1
+        min_datetime = datetime.datetime.fromtimestamp(min_int64 / 10**9)
+        max_datetime = datetime.datetime.fromtimestamp(max_int64 / 10**9)
+        try:
+            offset_hours = int(ty.tz)
+            tz = pytz.FixedOffset(offset_hours * 60)
+        except ValueError:
+            tz = pytz.timezone(ty.tz)
+        value = st.datetimes(timezones=st.just(tz), min_value=min_datetime,
+                             max_value=max_datetime)
+    elif pa.types.is_duration(ty):
         value = st.timedeltas()
-    elif pa.types.is_binary(type) or pa.types.is_large_binary(type):
+    elif pa.types.is_binary(ty) or pa.types.is_large_binary(ty):
         value = st.binary()
-    elif pa.types.is_string(type) or pa.types.is_large_string(type):
+    elif pa.types.is_string(ty) or pa.types.is_large_string(ty):
         value = st.text()
-    elif pa.types.is_decimal(type):
+    elif pa.types.is_decimal(ty):
         # TODO(kszucs): properly limit the precision
         # value = st.decimals(places=type.scale, allow_infinity=False)
         h.reject()
     else:
-        raise NotImplementedError(type)
+        raise NotImplementedError(ty)
 
     values = st.lists(value, min_size=size, max_size=size)
-    return pa.array(draw(values), type=type)
+    return pa.array(draw(values), type=ty)
 
 
 @st.composite
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index feb84dd1d4c..13a167fd311 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2513,9 +2513,20 @@ def test_numpy_binary_overflow_to_chunked():
 
 @pytest.mark.large_memory
 def test_list_child_overflow_to_chunked():
-    vals = [['x' * 1024]] * ((2 << 20) + 1)
-    with pytest.raises(ValueError, match="overflowed"):
-        pa.array(vals)
+    kilobyte_string = 'x' * 1024
+    two_mega = 2**21
+
+    vals = [[kilobyte_string]] * (two_mega - 1)
+    arr = pa.array(vals)
+    assert isinstance(arr, pa.Array)
+    assert len(arr) == two_mega - 1
+
+    vals = [[kilobyte_string]] * two_mega
+    arr = pa.array(vals)
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr) == two_mega
+    assert len(arr.chunk(0)) == two_mega - 1
+    assert len(arr.chunk(1)) == 1
 
 
 def test_infer_type_masked():
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index b8050f96468..f4894a4226a 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -15,19 +15,20 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pytest
-
-from pyarrow.pandas_compat import _pandas_api  # noqa
-import pyarrow as pa
-
 import collections
 import datetime
 import decimal
 import itertools
 import math
 
+import hypothesis as h
 import numpy as np
 import pytz
+import pytest
+
+from pyarrow.pandas_compat import _pandas_api  # noqa
+import pyarrow as pa
+import pyarrow.tests.strategies as past
 
 
 int_type_pairs = [
@@ -381,7 +382,7 @@ def test_sequence_custom_integers(seq):
 @parametrize_with_iterable_types
 def test_broken_integers(seq):
     data = [MyBrokenInt()]
-    with pytest.raises(pa.ArrowInvalid):
+    with pytest.raises(pa.ArrowInvalid, match="tried to convert to int"):
         pa.array(seq(data), type=pa.int64())
 
 
@@ -887,6 +888,7 @@ def test_sequence_timestamp():
 @pytest.mark.parametrize('timezone', [
     None,
     'UTC',
+    'Etc/GMT-1',
     'Europe/Budapest',
 ])
 @pytest.mark.parametrize('unit', [
@@ -960,6 +962,38 @@ def expected_datetime_value(dt):
         assert arr[i].as_py() == expected_datetime_value(utcdata[i])
 
 
+@pytest.mark.parametrize('timezone', [
+    None,
+    'UTC',
+    'Etc/GMT-1',
+    'Europe/Budapest',
+])
+def test_pyarrow_ignore_timezone_environment_variable(monkeypatch, timezone):
+    # note that any non-empty value will evaluate to true
+    monkeypatch.setenv("PYARROW_IGNORE_TIMEZONE", "1")
+    data = [
+        datetime.datetime(2007, 7, 13, 8, 23, 34, 123456),  # naive
+        pytz.utc.localize(
+            datetime.datetime(2008, 1, 5, 5, 0, 0, 1000)
+        ),
+        pytz.timezone('US/Eastern').localize(
+            datetime.datetime(2006, 1, 13, 12, 34, 56, 432539)
+        ),
+        pytz.timezone('Europe/Moscow').localize(
+            datetime.datetime(2010, 8, 13, 5, 0, 0, 437699)
+        ),
+    ]
+
+    expected = [dt.replace(tzinfo=None) for dt in data]
+    if timezone is not None:
+        tzinfo = pytz.timezone(timezone)
+        expected = [tzinfo.fromutc(dt) for dt in expected]
+
+    ty = pa.timestamp('us', tz=timezone)
+    arr = pa.array(data, type=ty)
+    assert arr.to_pylist() == expected
+
+
 def test_sequence_timestamp_with_timezone_inference():
     data = [
         datetime.datetime(2007, 7, 13, 8, 23, 34, 123456),  # naive
@@ -1513,6 +1547,108 @@ def test_struct_from_tuples():
             pa.array([tup], type=ty)
 
 
+def test_struct_from_list_of_pairs():
+    ty = pa.struct([
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.string()),
+        pa.field('c', pa.bool_())
+    ])
+    data = [
+        [('a', 5), ('b', 'foo'), ('c', True)],
+        [('a', 6), ('b', 'bar'), ('c', False)],
+        None
+    ]
+    arr = pa.array(data, type=ty)
+    assert arr.to_pylist() == [
+        {'a': 5, 'b': 'foo', 'c': True},
+        {'a': 6, 'b': 'bar', 'c': False},
+        None
+    ]
+
+    # test with duplicated field names
+    ty = pa.struct([
+        pa.field('a', pa.int32()),
+        pa.field('a', pa.string()),
+        pa.field('b', pa.bool_())
+    ])
+    data = [
+        [('a', 5), ('a', 'foo'), ('b', True)],
+        [('a', 6), ('a', 'bar'), ('b', False)],
+    ]
+    arr = pa.array(data, type=ty)
+    with pytest.raises(KeyError):
+        # TODO(kszucs): ARROW-9997
+        arr.to_pylist()
+
+    # test with empty elements
+    ty = pa.struct([
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.string()),
+        pa.field('c', pa.bool_())
+    ])
+    data = [
+        [],
+        [('a', 5), ('b', 'foo'), ('c', True)],
+        [('a', 2), ('b', 'baz')],
+        [('a', 1), ('b', 'bar'), ('c', False), ('d', 'julia')],
+    ]
+    expected = [
+        {'a': None, 'b': None, 'c': None},
+        {'a': 5, 'b': 'foo', 'c': True},
+        {'a': 2, 'b': 'baz', 'c': None},
+        {'a': 1, 'b': 'bar', 'c': False},
+    ]
+    arr = pa.array(data, type=ty)
+    assert arr.to_pylist() == expected
+
+
+def test_struct_from_list_of_pairs_errors():
+    ty = pa.struct([
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.string()),
+        pa.field('c', pa.bool_())
+    ])
+
+    # test that it raises if the key doesn't match the expected field name
+    data = [
+        [],
+        [('a', 5), ('c', True), ('b', None)],
+    ]
+    msg = "The expected field name is `b` but `c` was given"
+    with pytest.raises(ValueError, match=msg):
+        pa.array(data, type=ty)
+
+    # test various errors both at the first position and after because of key
+    # type inference
+    template = (
+        r"Could not convert {} with type {}: was expecting tuple of "
+        r"\(key, value\) pair"
+    )
+    cases = [
+        tuple(),  # empty key-value pair
+        tuple('a',),  # missing value
+        tuple('unknown-key',),  # not known field name
+        'string',  # not a tuple
+    ]
+    for key_value_pair in cases:
+        msg = template.format(
+            str(key_value_pair).replace('(', r'\(').replace(')', r'\)'),
+            type(key_value_pair).__name__
+        )
+
+        with pytest.raises(TypeError, match=msg):
+            pa.array([
+                [key_value_pair],
+                [('a', 5), ('b', 'foo'), ('c', None)],
+            ], type=ty)
+
+        with pytest.raises(TypeError, match=msg):
+            pa.array([
+                [('a', 5), ('b', 'foo'), ('c', None)],
+                [key_value_pair],
+            ], type=ty)
+
+
 def test_struct_from_mixed_sequence():
     # It is forbidden to mix dicts and tuples when initializing a struct array
     ty = pa.struct([pa.field('a', pa.int32()),
@@ -1530,6 +1666,7 @@ def test_struct_from_dicts_inference():
                                pa.field('c', pa.bool_())])
     data = [{'a': 5, 'b': 'foo', 'c': True},
             {'a': 6, 'b': 'bar', 'c': False}]
+
     arr = pa.array(data)
     check_struct_type(arr.type, expected_type)
     assert arr.to_pylist() == data
@@ -1543,6 +1680,7 @@ def test_struct_from_dicts_inference():
                 None,
                 {'a': None, 'b': None, 'c': None},
                 {'a': None, 'b': 'bar', 'c': None}]
+
     arr = pa.array(data)
     data_as_ndarray = np.empty(len(data), dtype=object)
     data_as_ndarray[:] = data
@@ -1561,6 +1699,7 @@ def test_struct_from_dicts_inference():
             {'a': {'aa': None, 'ab': False}, 'b': None},
             {'a': None, 'b': 'bar'}]
     arr = pa.array(data)
+
     assert arr.to_pylist() == data
 
     # Edge cases
@@ -1664,3 +1803,228 @@ def test_map_from_tuples():
     for entry in [[(5,)], [()], [('5', 'foo', True)]]:
         with pytest.raises(ValueError, match="(?i)tuple size"):
             pa.array([entry], type=pa.map_('i4', 'i4'))
+
+
+def test_dictionary_from_boolean():
+    typ = pa.dictionary(pa.int8(), value_type=pa.bool_())
+    a = pa.array([False, False, True, False, True], type=typ)
+    assert isinstance(a.type, pa.DictionaryType)
+    assert a.type.equals(typ)
+
+    expected_indices = pa.array([0, 0, 1, 0, 1], type=pa.int8())
+    expected_dictionary = pa.array([False, True], type=pa.bool_())
+    assert a.indices.equals(expected_indices)
+    assert a.dictionary.equals(expected_dictionary)
+
+
+@pytest.mark.parametrize('value_type', [
+    pa.int8(),
+    pa.int16(),
+    pa.int32(),
+    pa.int64(),
+    pa.uint8(),
+    pa.uint16(),
+    pa.uint32(),
+    pa.uint64(),
+    pa.float32(),
+    pa.float64(),
+])
+def test_dictionary_from_integers(value_type):
+    typ = pa.dictionary(pa.int8(), value_type=value_type)
+    a = pa.array([1, 2, 1, 1, 2, 3], type=typ)
+    assert isinstance(a.type, pa.DictionaryType)
+    assert a.type.equals(typ)
+
+    expected_indices = pa.array([0, 1, 0, 0, 1, 2], type=pa.int8())
+    expected_dictionary = pa.array([1, 2, 3], type=value_type)
+    assert a.indices.equals(expected_indices)
+    assert a.dictionary.equals(expected_dictionary)
+
+
+@pytest.mark.parametrize('input_index_type', [
+    pa.int8(),
+    pa.int16(),
+    pa.int32(),
+    pa.int64()
+])
+def test_dictionary_index_type(input_index_type):
+    # dictionary array is constructed using adaptive index type builder,
+    # but the input index type is considered as the minimal width type to use
+
+    typ = pa.dictionary(input_index_type, value_type=pa.int64())
+    arr = pa.array(range(10), type=typ)
+    assert arr.type.equals(typ)
+
+
+def test_dictionary_is_always_adaptive():
+    # dictionary array is constructed using adaptive index type builder,
+    # meaning that the output index type may be wider than the given index type
+    # since it depends on the input data
+    typ = pa.dictionary(pa.int8(), value_type=pa.int64())
+
+    a = pa.array(range(2**7), type=typ)
+    expected = pa.dictionary(pa.int8(), pa.int64())
+    assert a.type.equals(expected)
+
+    a = pa.array(range(2**7 + 1), type=typ)
+    expected = pa.dictionary(pa.int16(), pa.int64())
+    assert a.type.equals(expected)
+
+
+def test_dictionary_from_strings():
+    for value_type in [pa.binary(), pa.string()]:
+        typ = pa.dictionary(pa.int8(), value_type)
+        a = pa.array(["", "a", "bb", "a", "bb", "ccc"], type=typ)
+
+        assert isinstance(a.type, pa.DictionaryType)
+
+        expected_indices = pa.array([0, 1, 2, 1, 2, 3], type=pa.int8())
+        expected_dictionary = pa.array(["", "a", "bb", "ccc"], type=value_type)
+        assert a.indices.equals(expected_indices)
+        assert a.dictionary.equals(expected_dictionary)
+
+    # fixed size binary type
+    typ = pa.dictionary(pa.int8(), pa.binary(3))
+    a = pa.array(["aaa", "aaa", "bbb", "ccc", "bbb"], type=typ)
+    assert isinstance(a.type, pa.DictionaryType)
+
+    expected_indices = pa.array([0, 0, 1, 2, 1], type=pa.int8())
+    expected_dictionary = pa.array(["aaa", "bbb", "ccc"], type=pa.binary(3))
+    assert a.indices.equals(expected_indices)
+    assert a.dictionary.equals(expected_dictionary)
+
+
+def _has_unique_field_names(ty):
+    if isinstance(ty, pa.StructType):
+        field_names = [field.name for field in ty]
+        return len(set(field_names)) == len(field_names)
+    else:
+        return True
+
+
+@h.given(past.all_arrays)
+def test_array_to_pylist_roundtrip(arr):
+    # TODO(kszucs): ARROW-9997
+    h.assume(_has_unique_field_names(arr.type))
+    seq = arr.to_pylist()
+    restored = pa.array(seq, type=arr.type)
+    assert restored.equals(arr)
+
+
+@pytest.mark.large_memory
+def test_auto_chunking_binary_like():
+    # single chunk
+    v1 = b'x' * 100000000
+    v2 = b'x' * 147483646
+
+    # single chunk
+    one_chunk_data = [v1] * 20 + [b'', None, v2]
+    arr = pa.array(one_chunk_data, type=pa.binary())
+    assert isinstance(arr, pa.Array)
+    assert len(arr) == 23
+    assert arr[20].as_py() == b''
+    assert arr[21].as_py() is None
+    assert arr[22].as_py() == v2
+
+    # two chunks
+    two_chunk_data = one_chunk_data + [b'two']
+    arr = pa.array(two_chunk_data, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert arr.num_chunks == 2
+    assert len(arr.chunk(0)) == 23
+    assert len(arr.chunk(1)) == 1
+    assert arr.chunk(0)[20].as_py() == b''
+    assert arr.chunk(0)[21].as_py() is None
+    assert arr.chunk(0)[22].as_py() == v2
+    assert arr.chunk(1).to_pylist() == [b'two']
+
+    # three chunks
+    three_chunk_data = one_chunk_data * 2 + [b'three', b'three']
+    arr = pa.array(three_chunk_data, type=pa.binary())
+    assert isinstance(arr, pa.ChunkedArray)
+    assert arr.num_chunks == 3
+    assert len(arr.chunk(0)) == 23
+    assert len(arr.chunk(1)) == 23
+    assert len(arr.chunk(2)) == 2
+    for i in range(2):
+        assert arr.chunk(i)[20].as_py() == b''
+        assert arr.chunk(i)[21].as_py() is None
+        assert arr.chunk(i)[22].as_py() == v2
+    assert arr.chunk(2).to_pylist() == [b'three', b'three']
+
+
+@pytest.mark.large_memory
+def test_auto_chunking_list_of_binary():
+    # ARROW-6281
+    vals = [['x' * 1024]] * ((2 << 20) + 1)
+    arr = pa.array(vals)
+    assert isinstance(arr, pa.ChunkedArray)
+    assert arr.num_chunks == 2
+    assert len(arr.chunk(0)) == 2**21 - 1
+    assert len(arr.chunk(1)) == 2
+    assert arr.chunk(1).to_pylist() == [['x' * 1024]] * 2
+
+
+@pytest.mark.slow
+@pytest.mark.large_memory
+def test_auto_chunking_list_like():
+    item = np.ones((2**28,), dtype='uint8')
+    data = [item] * (2**3 - 1)
+    arr = pa.array(data, type=pa.list_(pa.uint8()))
+    assert isinstance(arr, pa.Array)
+    assert len(arr) == 7
+
+    item = np.ones((2**28,), dtype='uint8')
+    data = [item] * 2**3
+    arr = pa.array(data, type=pa.list_(pa.uint8()))
+    assert isinstance(arr, pa.ChunkedArray)
+    assert arr.num_chunks == 2
+    assert len(arr.chunk(0)) == 7
+    assert len(arr.chunk(1)) == 1
+    assert arr.chunk(1)[0].as_py() == list(item)
+
+
+@pytest.mark.slow
+@pytest.mark.large_memory
+def test_auto_chunking_map_type():
+    # takes ~20 minutes locally
+    ty = pa.map_(pa.int8(), pa.int8())
+    item = [(1, 1)] * 2**28
+    data = [item] * 2**3
+    arr = pa.array(data, type=ty)
+    assert isinstance(arr, pa.ChunkedArray)
+    assert len(arr.chunk(0)) == 7
+    assert len(arr.chunk(1)) == 1
+
+
+@pytest.mark.large_memory
+@pytest.mark.parametrize(('ty', 'char'), [
+    (pa.string(), 'x'),
+    (pa.binary(), b'x'),
+])
+def test_nested_auto_chunking(ty, char):
+    v1 = char * 100000000
+    v2 = char * 147483646
+
+    struct_type = pa.struct([
+        pa.field('bool', pa.bool_()),
+        pa.field('integer', pa.int64()),
+        pa.field('string-like', ty),
+    ])
+
+    data = [{'bool': True, 'integer': 1, 'string-like': v1}] * 20
+    data.append({'bool': True, 'integer': 1, 'string-like': v2})
+    arr = pa.array(data, type=struct_type)
+    assert isinstance(arr, pa.Array)
+
+    data.append({'bool': True, 'integer': 1, 'string-like': char})
+    arr = pa.array(data, type=struct_type)
+    assert isinstance(arr, pa.ChunkedArray)
+    assert arr.num_chunks == 2
+    assert len(arr.chunk(0)) == 21
+    assert len(arr.chunk(1)) == 1
+    assert arr.chunk(1)[0].as_py() == {
+        'bool': True,
+        'integer': 1,
+        'string-like': char
+    }
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 03407521c12..5de84b30432 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1497,6 +1497,22 @@ def test_bytes_exceed_2gb(self):
         table = pa.Table.from_pandas(df)
         assert table[0].num_chunks == 2
 
+    @pytest.mark.large_memory
+    @pytest.mark.parametrize('char', ['x', b'x'])
+    def test_auto_chunking_pandas_series_of_strings(self, char):
+        # ARROW-2367
+        v1 = char * 100000000
+        v2 = char * 147483646
+
+        df = pd.DataFrame({
+            'strings': [[v1]] * 20 + [[v2]] + [[b'x']]
+        })
+        arr = pa.array(df['strings'], from_pandas=True)
+        assert isinstance(arr, pa.ChunkedArray)
+        assert arr.num_chunks == 2
+        assert len(arr.chunk(0)) == 21
+        assert len(arr.chunk(1)) == 1
+
     def test_fixed_size_bytes(self):
         values = [b'foo', None, bytearray(b'bar'), None, None, b'hey']
         df = pd.DataFrame({'strings': values})
@@ -2087,6 +2103,22 @@ def test_large_binary_list(self):
                 s, pd.Series([["aa", "bb"], None, ["cc"], []]),
                 check_names=False)
 
+    @pytest.mark.slow
+    @pytest.mark.large_memory
+    def test_auto_chunking_on_list_overflow(self):
+        # ARROW-9976
+        n = 2**24
+        df = pd.DataFrame.from_dict({
+            "a": list(np.zeros((n, 2**7), dtype='uint8')),
+            "b": range(n)
+        })
+        table = pa.Table.from_pandas(df)
+
+        column_a = table[0]
+        assert column_a.num_chunks == 2
+        assert len(column_a.chunk(0)) == 2**24 - 1
+        assert len(column_a.chunk(1)) == 1
+
 
 class TestConvertStructTypes:
     """
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 091ae38e6e4..fa48ad8b5f2 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -531,27 +531,28 @@ def test_map():
 
 
 def test_dictionary():
-    indices = [2, 1, 2, 0]
-    dictionary = ['foo', 'bar', 'baz']
+    indices = pa.array([2, None, 1, 2, 0, None])
+    dictionary = pa.array(['foo', 'bar', 'baz'])
 
     arr = pa.DictionaryArray.from_arrays(indices, dictionary)
-    expected = ['baz', 'bar', 'baz', 'foo']
+    expected = ['baz', None, 'bar', 'baz', 'foo', None]
+    assert arr.to_pylist() == expected
 
     for j, (i, v) in enumerate(zip(indices, expected)):
         s = arr[j]
 
         assert s.as_py() == v
         assert s.value.as_py() == v
-        assert s.index.as_py() == i
-        assert s.dictionary.to_pylist() == dictionary
+        assert s.index.equals(i)
+        assert s.dictionary.equals(dictionary)
 
         with pytest.warns(FutureWarning):
-            assert s.index_value.as_py() == i
+            assert s.index_value.equals(i)
         with pytest.warns(FutureWarning):
             assert s.dictionary_value.as_py() == v
 
-    with pytest.raises(pa.ArrowNotImplementedError):
-        pickle.loads(pickle.dumps(s))
+        restored = pickle.loads(pickle.dumps(s))
+        assert restored.equals(s)
 
 
 def test_union():
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index c52751e91ac..9ac2f01686f 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -25,6 +25,7 @@
 import pytz
 import hypothesis as h
 import hypothesis.strategies as st
+import hypothesis.extra.pytz as tzst
 import weakref
 
 import numpy as np
@@ -258,6 +259,9 @@ def test_is_primitive():
 @pytest.mark.parametrize(('tz', 'expected'), [
     (pytz.utc, 'UTC'),
     (pytz.timezone('Europe/Paris'), 'Europe/Paris'),
+    # StaticTzInfo.tzname returns with '-09' so we need to infer the timezone's
+    # name from the tzinfo.zone attribute
+    (pytz.timezone('Etc/GMT-9'), 'Etc/GMT-9'),
     (pytz.FixedOffset(180), '+03:00'),
     (datetime.timezone.utc, '+00:00'),
     (datetime.timezone(datetime.timedelta(hours=1, minutes=30)), '+01:30')
@@ -280,6 +284,13 @@ def test_tzinfo_to_string_errors():
             pa.lib.tzinfo_to_string(tz)
 
 
+@h.given(tzst.timezones())
+def test_pytz_timezone_roundtrip(tz):
+    timezone_string = pa.lib.tzinfo_to_string(tz)
+    timezone_tzinfo = pa.lib.string_to_tzinfo(timezone_string)
+    assert timezone_tzinfo == tz
+
+
 def test_convert_custom_tzinfo_objects_to_string():
     class CorrectTimezone1(datetime.tzinfo):
         """