From 9695c78ce0a1bff9bbdd083d97d06ed3b6ff0680 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Mon, 20 Jun 2022 19:45:52 -0500
Subject: [PATCH 1/9] Various porting toward removing ValueDescr from kernel
 APIs

More refactoring

More refactoring

More refactoring

Checkpoint, still some ValueDescr to remove

Clean up and delete some more scalar output code

More refactoring

More refactoring

More refactoring, code cleaning

more cleaning

More cleaning

checkpoint

Get everything compiling again

More refactoring

exec.cc refactoring, get compiling again

Handle scalar -> array span promotions

Work to make all scalars more well formed

Make union scalars more 'well formed'

All things compiling again

Fix some more stuff

Fix some more tedious errors

Fix some more things

Fixed MapBuilder::AppendArraySlice bug

checkpoint

Fix more bugs

C++ tests passing again, restore cumulative_sum scalar tests
---
 .../arrow/compute_register_example.cc         |   3 +-
 cpp/examples/arrow/udf_example.cc             |   6 +-
 cpp/gdb_arrow.py                              |   7 +-
 cpp/src/arrow/array/array_base.cc             |  23 +-
 cpp/src/arrow/array/array_test.cc             |  12 +-
 cpp/src/arrow/array/builder_base.cc           |  95 ++--
 cpp/src/arrow/array/builder_nested.h          |   6 +-
 cpp/src/arrow/array/data.cc                   | 221 ++++++--
 cpp/src/arrow/array/data.h                    |  21 +-
 cpp/src/arrow/array/util.cc                   |  16 +-
 cpp/src/arrow/compare.cc                      |  11 +-
 cpp/src/arrow/compute/api_vector.cc           |   4 +-
 cpp/src/arrow/compute/cast.cc                 |  68 +--
 cpp/src/arrow/compute/cast.h                  |  55 +-
 cpp/src/arrow/compute/cast_internal.h         |  29 ++
 cpp/src/arrow/compute/exec.cc                 | 337 ++++++------
 cpp/src/arrow/compute/exec.h                  |  87 +---
 cpp/src/arrow/compute/exec/aggregate.cc       |  60 +--
 cpp/src/arrow/compute/exec/aggregate.h        |   6 +-
 cpp/src/arrow/compute/exec/aggregate_node.cc  |  36 +-
 cpp/src/arrow/compute/exec/expression.cc      | 119 ++---
 cpp/src/arrow/compute/exec/expression.h       |  15 +-
 .../arrow/compute/exec/expression_internal.h  |  21 +-
 cpp/src/arrow/compute/exec/expression_test.cc |  26 +-
 cpp/src/arrow/compute/exec/hash_join.cc       |   6 +-
 cpp/src/arrow/compute/exec/hash_join_dict.cc  |  18 +-
 .../arrow/compute/exec/hash_join_node_test.cc |  14 +-
 cpp/src/arrow/compute/exec/plan_test.cc       |  29 +-
 cpp/src/arrow/compute/exec/project_node.cc    |   4 +-
 cpp/src/arrow/compute/exec/test_util.cc       |  23 +-
 cpp/src/arrow/compute/exec/test_util.h        |   8 +-
 cpp/src/arrow/compute/exec_internal.h         |  15 +-
 cpp/src/arrow/compute/exec_test.cc            |  48 +-
 cpp/src/arrow/compute/function.cc             | 155 +++---
 cpp/src/arrow/compute/function.h              |  24 +-
 cpp/src/arrow/compute/function_benchmark.cc   |  49 +-
 cpp/src/arrow/compute/function_internal.h     |  10 +
 cpp/src/arrow/compute/function_test.cc        |  22 +-
 cpp/src/arrow/compute/kernel.cc               |  90 ++--
 cpp/src/arrow/compute/kernel.h                | 153 ++----
 cpp/src/arrow/compute/kernel_test.cc          | 284 ++++-------
 .../arrow/compute/kernels/aggregate_basic.cc  |  84 ++-
 .../compute/kernels/aggregate_basic_avx2.cc   |   8 +-
 .../compute/kernels/aggregate_basic_avx512.cc |   8 +-
 .../kernels/aggregate_basic_internal.h        |  11 +-
 .../arrow/compute/kernels/aggregate_mode.cc   |  12 +-
 .../compute/kernels/aggregate_quantile.cc     |  10 +-
 .../compute/kernels/aggregate_tdigest.cc      |   9 +-
 .../arrow/compute/kernels/codegen_internal.cc | 177 ++++---
 .../arrow/compute/kernels/codegen_internal.h  | 139 ++---
 .../compute/kernels/codegen_internal_test.cc  | 139 ++---
 .../arrow/compute/kernels/hash_aggregate.cc   | 142 +++---
 .../compute/kernels/hash_aggregate_test.cc    |  96 ++--
 cpp/src/arrow/compute/kernels/row_encoder.cc  |  25 +-
 cpp/src/arrow/compute/kernels/row_encoder.h   |   2 +-
 .../compute/kernels/scalar_arithmetic.cc      | 264 +++++-----
 .../arrow/compute/kernels/scalar_boolean.cc   |  89 +---
 .../compute/kernels/scalar_cast_dictionary.cc |  34 --
 .../compute/kernels/scalar_cast_internal.cc   | 142 ++----
 .../compute/kernels/scalar_cast_internal.h    |  17 +-
 .../compute/kernels/scalar_cast_nested.cc     |  35 --
 .../compute/kernels/scalar_cast_numeric.cc    |  58 +--
 .../compute/kernels/scalar_cast_string.cc     |  67 +--
 .../compute/kernels/scalar_cast_temporal.cc   |   3 +-
 .../arrow/compute/kernels/scalar_cast_test.cc |  12 +-
 .../arrow/compute/kernels/scalar_compare.cc   | 149 ++----
 .../arrow/compute/kernels/scalar_if_else.cc   | 338 +++++-------
 .../compute/kernels/scalar_if_else_test.cc    |  66 ++-
 .../arrow/compute/kernels/scalar_nested.cc    | 374 ++++----------
 .../compute/kernels/scalar_nested_test.cc     |  12 +-
 .../arrow/compute/kernels/scalar_random.cc    |   4 +-
 .../compute/kernels/scalar_set_lookup.cc      |  25 +-
 .../compute/kernels/scalar_string_ascii.cc    | 479 +++++-------------
 .../compute/kernels/scalar_string_internal.h  | 111 +---
 .../compute/kernels/scalar_string_utf8.cc     |  24 +-
 .../compute/kernels/scalar_temporal_unary.cc  | 104 +---
 .../arrow/compute/kernels/scalar_validity.cc  | 212 +++-----
 cpp/src/arrow/compute/kernels/test_util.cc    |  18 +-
 cpp/src/arrow/compute/kernels/test_util.h     |  10 +-
 .../arrow/compute/kernels/util_internal.cc    |  64 ---
 cpp/src/arrow/compute/kernels/util_internal.h |  11 -
 .../compute/kernels/vector_array_sort.cc      |  17 +-
 .../compute/kernels/vector_cumulative_ops.cc  |  20 +-
 .../kernels/vector_cumulative_ops_test.cc     |  48 +-
 cpp/src/arrow/compute/kernels/vector_hash.cc  |  37 +-
 .../arrow/compute/kernels/vector_nested.cc    |   9 +-
 .../arrow/compute/kernels/vector_replace.cc   |  21 +-
 .../arrow/compute/kernels/vector_selection.cc | 118 ++---
 cpp/src/arrow/compute/row/grouper.cc          |  55 +-
 cpp/src/arrow/compute/row/grouper.h           |   2 +-
 cpp/src/arrow/compute/type_fwd.h              |   2 +-
 cpp/src/arrow/dataset/partition.cc            |   2 +-
 cpp/src/arrow/dataset/scanner.cc              |   4 +-
 cpp/src/arrow/datum.cc                        |  73 ---
 cpp/src/arrow/datum.h                         |  68 ---
 cpp/src/arrow/datum_test.cc                   |  26 -
 cpp/src/arrow/ipc/json_simple.cc              |   3 +-
 cpp/src/arrow/python/gdb.cc                   |  36 +-
 cpp/src/arrow/python/udf.cc                   |  57 +--
 cpp/src/arrow/scalar.cc                       | 328 +++++++-----
 cpp/src/arrow/scalar.h                        |  88 ++--
 cpp/src/arrow/scalar_test.cc                  | 180 +++++--
 cpp/src/arrow/type.cc                         |  25 +-
 cpp/src/arrow/type.h                          |  61 ++-
 cpp/src/arrow/type_fwd.h                      |  40 +-
 cpp/src/arrow/type_traits.h                   |  13 +
 106 files changed, 3008 insertions(+), 4145 deletions(-)

diff --git a/cpp/examples/arrow/compute_register_example.cc b/cpp/examples/arrow/compute_register_example.cc
index 13d80b29631..113dfd0faf3 100644
--- a/cpp/examples/arrow/compute_register_example.cc
+++ b/cpp/examples/arrow/compute_register_example.cc
@@ -127,8 +127,7 @@ const cp::FunctionDoc func_doc{
 int main(int argc, char** argv) {
   const std::string name = "compute_register_example";
   auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Unary(), func_doc);
-  cp::ScalarKernel kernel({cp::InputType::Array(arrow::int64())}, arrow::int64(),
-                          ExampleFunctionImpl);
+  cp::ScalarKernel kernel({arrow::int64()}, arrow::int64(), ExampleFunctionImpl);
   kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
   ABORT_ON_FAILURE(func->AddKernel(std::move(kernel)));
 
diff --git a/cpp/examples/arrow/udf_example.cc b/cpp/examples/arrow/udf_example.cc
index 47c45411477..ccd804339a2 100644
--- a/cpp/examples/arrow/udf_example.cc
+++ b/cpp/examples/arrow/udf_example.cc
@@ -75,10 +75,8 @@ arrow::Status SampleFunction(cp::KernelContext* ctx, const cp::ExecSpan& batch,
 arrow::Status Execute() {
   const std::string name = "add_three";
   auto func = std::make_shared<cp::ScalarFunction>(name, cp::Arity::Ternary(), func_doc);
-  cp::ScalarKernel kernel(
-      {cp::InputType::Array(arrow::int64()), cp::InputType::Array(arrow::int64()),
-       cp::InputType::Array(arrow::int64())},
-      arrow::int64(), SampleFunction);
+  cp::ScalarKernel kernel({arrow::int64(), arrow::int64(), arrow::int64()},
+                          arrow::int64(), SampleFunction);
 
   kernel.mem_allocation = cp::MemAllocation::PREALLOCATE;
   kernel.null_handling = cp::NullHandling::INTERSECTION;
diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index cd687ec8b2e..2237da4cc98 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -1406,13 +1406,12 @@ class FixedSizeBinaryScalarPrinter(BaseBinaryScalarPrinter):
 
     def to_string(self):
         size = self.type['byte_width_']
-        if not self.is_valid:
-            return f"{self._format_type()} of size {size}, null value"
         bufptr = BufferPtr(SharedPtr(self.val['value']).get())
         if bufptr.data is None:
             return f"{self._format_type()} of size {size}, <unallocated>"
-        return (f"{self._format_type()} of size {size}, "
-                f"value {self._format_buf(bufptr)}")
+        nullness = 'non-null' if self.is_valid else 'null'
+        return (f"{self._format_type()} {nullness} of size {size}, "
+                f"value buffer {self._format_buf(bufptr)}")
 
 
 class DictionaryScalarPrinter(ScalarPrinter):
diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index b36fb0fb94a..5d27b2aedfb 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -104,16 +104,15 @@ struct ScalarFromArraySlotImpl {
   }
 
   Status Visit(const SparseUnionArray& a) {
-    const auto type_code = a.type_code(index_);
-    // child array which stores the actual value
-    const auto arr = a.field(a.child_id(index_));
-    // no need to adjust the index
-    ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(index_));
-    if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(value, type_code, a.type()));
-    } else {
-      out_ = std::shared_ptr<Scalar>(new SparseUnionScalar(type_code, a.type()));
+    int8_t type_code = a.type_code(index_);
+
+    ScalarVector children;
+    for (int i = 0; i < a.type()->num_fields(); ++i) {
+      children.emplace_back();
+      ARROW_ASSIGN_OR_RAISE(children.back(), a.field(i)->GetScalar(index_));
     }
+
+    out_ = std::make_shared<SparseUnionScalar>(std::move(children), type_code, a.type());
     return Status::OK();
   }
 
@@ -124,11 +123,7 @@ struct ScalarFromArraySlotImpl {
     // need to look up the value based on offsets
     auto offset = a.value_offset(index_);
     ARROW_ASSIGN_OR_RAISE(auto value, arr->GetScalar(offset));
-    if (value->is_valid) {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(value, type_code, a.type()));
-    } else {
-      out_ = std::shared_ptr<Scalar>(new DenseUnionScalar(type_code, a.type()));
-    }
+    out_ = std::make_shared<DenseUnionScalar>(value, type_code, a.type());
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 0d9afba6ece..d438557a330 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -561,16 +561,16 @@ static ScalarVector GetScalars() {
           },
           struct_({field("min", int32()), field("max", int32())})),
       // Same values, different union type codes
-      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 6,
-                                          sparse_union_ty),
-      std::make_shared<SparseUnionScalar>(std::make_shared<Int32Scalar>(100), 42,
-                                          sparse_union_ty),
-      std::make_shared<SparseUnionScalar>(42, sparse_union_ty),
+      SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 1,
+                                   sparse_union_ty),
+      SparseUnionScalar::FromValue(std::make_shared<Int32Scalar>(100), 2,
+                                   sparse_union_ty),
+      SparseUnionScalar::FromValue(MakeNullScalar(int32()), 2, sparse_union_ty),
       std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 6,
                                          dense_union_ty),
       std::make_shared<DenseUnionScalar>(std::make_shared<Int32Scalar>(101), 42,
                                          dense_union_ty),
-      std::make_shared<DenseUnionScalar>(42, dense_union_ty),
+      std::make_shared<DenseUnionScalar>(MakeNullScalar(int32()), 42, dense_union_ty),
       DictionaryScalar::Make(ScalarFromJSON(int8(), "1"),
                              ArrayFromJSON(utf8(), R"(["foo", "bar"])")),
       DictionaryScalar::Make(ScalarFromJSON(uint8(), "1"),
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 49abd8e0234..deadef061df 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -34,6 +34,8 @@
 
 namespace arrow {
 
+using internal::checked_cast;
+
 Status ArrayBuilder::CheckArrayType(const std::shared_ptr<DataType>& expected_type,
                                     const Array& array, const char* message) {
   if (!expected_type->Equals(*array.type())) {
@@ -105,14 +107,13 @@ struct AppendScalarImpl {
                   is_fixed_size_binary_type<T>::value,
               Status>
   Visit(const T&) {
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
            raw++) {
-        auto scalar =
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
         if (scalar->is_valid) {
           builder->UnsafeAppend(scalar->value);
         } else {
@@ -128,22 +129,20 @@ struct AppendScalarImpl {
     int64_t data_size = 0;
     for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
          raw++) {
-      auto scalar =
-          internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+      auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
       if (scalar->is_valid) {
         data_size += scalar->value->size();
       }
     }
 
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
     RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
            raw++) {
-        auto scalar =
-            internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
+        auto scalar = checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
         if (scalar->is_valid) {
           builder->UnsafeAppend(util::string_view{*scalar->value});
         } else {
@@ -156,13 +155,12 @@ struct AppendScalarImpl {
 
   template <typename T>
   enable_if_list_like<T, Status> Visit(const T&) {
-    auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
+    auto builder = checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
     int64_t num_children = 0;
     for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
          scalar++) {
       if (!(*scalar)->is_valid) continue;
-      num_children +=
-          internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
+      num_children += checked_cast<const BaseListScalar&>(**scalar).value->length();
     }
     RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
 
@@ -171,8 +169,7 @@ struct AppendScalarImpl {
            scalar++) {
         if ((*scalar)->is_valid) {
           RETURN_NOT_OK(builder->Append());
-          const Array& list =
-              *internal::checked_cast<const BaseListScalar&>(**scalar).value;
+          const Array& list = *checked_cast<const BaseListScalar&>(**scalar).value;
           for (int64_t i = 0; i < list.length(); i++) {
             ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
             RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
@@ -186,7 +183,7 @@ struct AppendScalarImpl {
   }
 
   Status Visit(const StructType& type) {
-    auto* builder = internal::checked_cast<StructBuilder*>(builder_);
+    auto* builder = checked_cast<StructBuilder*>(builder_);
     auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
     RETURN_NOT_OK(builder->Reserve(count));
     for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
@@ -194,7 +191,7 @@ struct AppendScalarImpl {
     }
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
+        const auto& scalar = checked_cast<const StructScalar&>(**s);
         for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
           if (!scalar.is_valid || !scalar.value[field_index]) {
             RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
@@ -213,12 +210,55 @@ struct AppendScalarImpl {
 
   Status Visit(const DenseUnionType& type) { return MakeUnionArray(type); }
 
+  template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType>
+  Status AppendUnionScalar(const T& type, const Scalar& s, BuilderType* builder) {
+    const auto& scalar = checked_cast<const DenseUnionScalar&>(s);
+    const auto scalar_field_index = type.child_ids()[scalar.type_code];
+    RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      auto* child_builder = builder->child_builder(field_index).get();
+      if (field_index == scalar_field_index) {
+        if (scalar.is_valid) {
+          RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
+        } else {
+          RETURN_NOT_OK(child_builder->AppendNull());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <>
+  Status AppendUnionScalar(const SparseUnionType& type, const Scalar& s,
+                           SparseUnionBuilder* builder) {
+    // For each scalar,
+    //  1. append the type code,
+    //  2. append the value to the corresponding child,
+    //  3. append null to the other children.
+    const auto& scalar = checked_cast<const SparseUnionScalar&>(s);
+    RETURN_NOT_OK(builder->Append(scalar.type_code));
+
+    for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+      auto* child_builder = builder->child_builder(field_index).get();
+      if (field_index == scalar.child_id) {
+        if (scalar.is_valid) {
+          RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value[field_index]));
+        } else {
+          RETURN_NOT_OK(child_builder->AppendNull());
+        }
+      } else {
+        RETURN_NOT_OK(child_builder->AppendNull());
+      }
+    }
+    return Status::OK();
+  }
+
   template <typename T>
   Status MakeUnionArray(const T& type) {
     using BuilderType = typename TypeTraits<T>::BuilderType;
-    constexpr bool is_dense = std::is_same<T, DenseUnionType>::value;
 
-    auto* builder = internal::checked_cast<BuilderType*>(builder_);
+    auto* builder = checked_cast<BuilderType*>(builder_);
     const auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
 
     RETURN_NOT_OK(builder->Reserve(count));
@@ -230,26 +270,7 @@ struct AppendScalarImpl {
 
     for (int64_t i = 0; i < n_repeats_; i++) {
       for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
-        // For each scalar,
-        //  1. append the type code,
-        //  2. append the value to the corresponding child,
-        //  3. if the union is sparse, append null to the other children.
-        const auto& scalar = internal::checked_cast<const UnionScalar&>(**s);
-        const auto scalar_field_index = type.child_ids()[scalar.type_code];
-        RETURN_NOT_OK(builder->Append(scalar.type_code));
-
-        for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
-          auto* child_builder = builder->child_builder(field_index).get();
-          if (field_index == scalar_field_index) {
-            if (scalar.is_valid) {
-              RETURN_NOT_OK(child_builder->AppendScalar(*scalar.value));
-            } else {
-              RETURN_NOT_OK(child_builder->AppendNull());
-            }
-          } else if (!is_dense) {
-            RETURN_NOT_OK(child_builder->AppendNull());
-          }
-        }
+        RETURN_NOT_OK(AppendUnionScalar(type, **s, builder));
       }
     }
     return Status::OK();
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 3d36cb5f65e..306d861b09f 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -304,10 +304,12 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
       if (!validity || bit_util::GetBit(validity, array.offset + row)) {
         ARROW_RETURN_NOT_OK(Append());
         const int64_t slot_length = offsets[row + 1] - offsets[row];
+        // Add together the inner StructArray offset to the Map/List offset
+        int64_t key_value_offset = array.child_data[0].offset + offsets[row];
         ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
-            array.child_data[0].child_data[0], offsets[row], slot_length));
+            array.child_data[0].child_data[0], key_value_offset, slot_length));
         ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
-            array.child_data[0].child_data[1], offsets[row], slot_length));
+            array.child_data[0].child_data[1], key_value_offset, slot_length));
       } else {
         ARROW_RETURN_NOT_OK(AppendNull());
       }
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 37db8ccb775..970bcaaaeb2 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -38,6 +38,7 @@
 
 namespace arrow {
 
+using internal::checked_cast;
 using internal::CountSetBits;
 
 static inline void AdjustNonNullable(Type::type type_id, int64_t length,
@@ -174,27 +175,197 @@ void ArraySpan::SetMembers(const ArrayData& data) {
   }
 }
 
+template <typename offset_type>
+void SetOffsetsForScalar(ArraySpan* span, uint8_t* buffer, int64_t value_size,
+                         int buffer_index = 1) {
+  auto offsets = reinterpret_cast<offset_type*>(buffer);
+  offsets[0] = 0;
+  offsets[1] = static_cast<offset_type>(value_size);
+  span->buffers[buffer_index].data = buffer;
+  span->buffers[buffer_index].size = 2 * sizeof(offset_type);
+}
+
+int GetNumBuffers(const DataType& type) {
+  switch (type.id()) {
+    case Type::NA:
+    case Type::STRUCT:
+    case Type::FIXED_SIZE_LIST:
+      return 1;
+    case Type::BINARY:
+    case Type::LARGE_BINARY:
+    case Type::STRING:
+    case Type::LARGE_STRING:
+    case Type::DENSE_UNION:
+      return 3;
+    case Type::EXTENSION:
+      // The number of buffers depends on the storage type
+      return GetNumBuffers(
+          *internal::checked_cast<const ExtensionType&>(type).storage_type());
+    default:
+      // Everything else has 2 buffers
+      return 2;
+  }
+}
+
+namespace internal {
+
+void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
+  memset(span->scratch_space, 0x00, 16);
+
+  span->type = type;
+  span->length = 0;
+  int num_buffers = GetNumBuffers(*type);
+  for (int i = 0; i < num_buffers; ++i) {
+    span->buffers[i].data = span->scratch_space;
+    span->buffers[i].size = 0;
+  }
+
+  for (int i = num_buffers; i < 3; ++i) {
+    span->ClearBuffer(i);
+  }
+
+  // Fill children
+  span->child_data.resize(type->num_fields());
+  for (int i = 0; i < type->num_fields(); ++i) {
+    FillZeroLengthArray(type->field(i)->type().get(), &span->child_data[i]);
+  }
+}
+
+}  // namespace internal
+
 void ArraySpan::FillFromScalar(const Scalar& value) {
-  static const uint8_t kValidByte = 0x01;
-  static const uint8_t kNullByte = 0x00;
+  static uint8_t kTrueBit = 0x01;
+  static uint8_t kFalseBit = 0x00;
 
   this->type = value.type.get();
   this->length = 1;
 
-  // Populate null count and validity bitmap
+  Type::type type_id = value.type->id();
+
+  // Populate null count and validity bitmap (only for non-union types)
   this->null_count = value.is_valid ? 0 : 1;
-  this->buffers[0].data = const_cast<uint8_t*>(value.is_valid ? &kValidByte : &kNullByte);
-  this->buffers[0].size = 1;
+  if (!is_union(type_id)) {
+    this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit;
+    this->buffers[0].size = 1;
+  }
 
-  if (is_primitive(value.type->id())) {
-    const auto& scalar =
-        internal::checked_cast<const internal::PrimitiveScalarBase&>(value);
+  if (type_id == Type::BOOL) {
+    const auto& scalar = checked_cast<const BooleanScalar&>(value);
+    this->buffers[1].data = scalar.value ? &kTrueBit : &kFalseBit;
+    this->buffers[1].size = 1;
+  } else if (is_primitive(type_id) || is_decimal(type_id) ||
+             type_id == Type::DICTIONARY) {
+    const auto& scalar = checked_cast<const internal::PrimitiveScalarBase&>(value);
     const uint8_t* scalar_data = reinterpret_cast<const uint8_t*>(scalar.view().data());
     this->buffers[1].data = const_cast<uint8_t*>(scalar_data);
     this->buffers[1].size = scalar.type->byte_width();
+    if (type_id == Type::DICTIONARY) {
+      // Populate dictionary data
+      const auto& dict_scalar = checked_cast<const DictionaryScalar&>(value);
+      this->child_data.resize(1);
+      this->child_data[0].SetMembers(*dict_scalar.value.dictionary->data());
+    }
+  } else if (is_base_binary_like(type_id)) {
+    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
+    this->buffers[1].data = this->scratch_space;
+    const uint8_t* data_buffer = nullptr;
+    int64_t data_size = 0;
+    if (scalar.is_valid) {
+      data_buffer = scalar.value->data();
+      data_size = scalar.value->size();
+    }
+    if (is_binary_like(type_id)) {
+      SetOffsetsForScalar<int32_t>(this, this->scratch_space, data_size);
+    } else {
+      // is_large_binary_like
+      SetOffsetsForScalar<int64_t>(this, this->scratch_space, data_size);
+    }
+    this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
+    this->buffers[2].size = data_size;
+  } else if (type_id == Type::FIXED_SIZE_BINARY) {
+    const auto& scalar = checked_cast<const BaseBinaryScalar&>(value);
+    this->buffers[1].data = const_cast<uint8_t*>(scalar.value->data());
+    this->buffers[1].size = scalar.value->size();
+  } else if (is_list_like(type_id)) {
+    const auto& scalar = checked_cast<const BaseListScalar&>(value);
+
+    int64_t value_length = 0;
+    this->child_data.resize(1);
+    if (scalar.value != nullptr) {
+      // When the scalar is null, scalar.value can also be null
+      this->child_data[0].SetMembers(*scalar.value->data());
+      value_length = scalar.value->length();
+    } else {
+      // Even when the value is null, we still must populate the
+      // child_data to yield a valid array. Tedious
+      internal::FillZeroLengthArray(this->type->field(0)->type().get(),
+                                    &this->child_data[0]);
+    }
+
+    if (type_id == Type::LIST || type_id == Type::MAP) {
+      SetOffsetsForScalar<int32_t>(this, this->scratch_space, value_length);
+    } else if (type_id == Type::LARGE_LIST) {
+      SetOffsetsForScalar<int64_t>(this, this->scratch_space, value_length);
+    } else {
+      // FIXED_SIZE_LIST: does not have a second buffer
+      this->buffers[1].data = nullptr;
+      this->buffers[1].size = 0;
+    }
+  } else if (type_id == Type::STRUCT) {
+    const auto& scalar = checked_cast<const StructScalar&>(value);
+    this->child_data.resize(this->type->num_fields());
+    DCHECK_EQ(this->type->num_fields(), static_cast<int>(scalar.value.size()));
+    for (size_t i = 0; i < scalar.value.size(); ++i) {
+      this->child_data[i].FillFromScalar(*scalar.value[i]);
+    }
+  } else if (is_union(type_id)) {
+    // First buffer is kept null since unions have no validity vector
+    this->buffers[0].data = nullptr;
+    this->buffers[0].size = 0;
+
+    this->buffers[1].data = this->scratch_space;
+    this->buffers[1].size = 1;
+    int8_t* type_codes = reinterpret_cast<int8_t*>(this->scratch_space);
+    type_codes[0] = checked_cast<const UnionScalar&>(value).type_code;
+
+    this->child_data.resize(this->type->num_fields());
+    if (type_id == Type::DENSE_UNION) {
+      const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
+      // Has offset; start 4 bytes in so it's aligned to a 32-bit boundaries
+      SetOffsetsForScalar<int32_t>(this, this->scratch_space + sizeof(int32_t), 1,
+                                   /*buffer_index=*/2);
+      // We can't "see" the other arrays in the union, but we put the "active"
+      // union array in the right place and fill zero-length arrays for the
+      // others
+      const std::vector<int>& child_ids =
+          static_cast<const UnionType*>(this->type)->child_ids();
+      DCHECK_GE(scalar.type_code, 0);
+      DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
+      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
+        if (i == child_ids[scalar.type_code]) {
+          this->child_data[i].FillFromScalar(*scalar.value);
+        } else {
+          internal::FillZeroLengthArray(this->type->field(i)->type().get(),
+                                        &this->child_data[i]);
+        }
+      }
+    } else {
+      const auto& scalar = checked_cast<const SparseUnionScalar&>(value);
+      // Sparse union scalars have a full complement of child values even
+      // though only one of them is relevant, so we just fill them in here
+      for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
+        this->child_data[i].FillFromScalar(*scalar.value[i]);
+      }
+    }
+  } else if (type_id == Type::EXTENSION) {
+    // Pass through storage
+    const auto& scalar = checked_cast<const ExtensionScalar&>(value);
+    FillFromScalar(*scalar.value);
+
+    // Restore the extension type
+    this->type = value.type.get();
   } else {
-    // TODO(wesm): implement for other types
-    DCHECK(false) << "need to implement for other types";
+    DCHECK_EQ(Type::NA, type_id) << "should be unreachable: " << *value.type;
   }
 }
 
@@ -212,40 +383,14 @@ int64_t ArraySpan::GetNullCount() const {
   return precomputed;
 }
 
-int GetNumBuffers(const DataType& type) {
-  switch (type.id()) {
-    case Type::NA:
-    case Type::STRUCT:
-    case Type::FIXED_SIZE_LIST:
-      return 1;
-    case Type::BINARY:
-    case Type::LARGE_BINARY:
-    case Type::STRING:
-    case Type::LARGE_STRING:
-    case Type::DENSE_UNION:
-      return 3;
-    case Type::EXTENSION:
-      // The number of buffers depends on the storage type
-      return GetNumBuffers(
-          *internal::checked_cast<const ExtensionType&>(type).storage_type());
-    default:
-      // Everything else has 2 buffers
-      return 2;
-  }
-}
-
 int ArraySpan::num_buffers() const { return GetNumBuffers(*this->type); }
 
 std::shared_ptr<ArrayData> ArraySpan::ToArrayData() const {
-  auto result = std::make_shared<ArrayData>(this->type->Copy(), this->length,
+  auto result = std::make_shared<ArrayData>(this->type->GetSharedPtr(), this->length,
                                             this->null_count, this->offset);
 
   for (int i = 0; i < this->num_buffers(); ++i) {
-    if (this->buffers[i].owner) {
-      result->buffers.emplace_back(this->GetBuffer(i));
-    } else {
-      result->buffers.push_back(nullptr);
-    }
+    result->buffers.emplace_back(this->GetBuffer(i));
   }
 
   if (this->type->id() == Type::NA) {
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index df547aedfaf..b76ab597107 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -266,6 +266,11 @@ struct ARROW_EXPORT ArraySpan {
   int64_t offset = 0;
   BufferSpan buffers[3];
 
+  // 16 bytes of scratch space to enable this ArraySpan to be a view onto
+  // scalar values including binary scalars (where we need to create a buffer
+  // that looks like two 32-bit or 64-bit offsets)
+  uint8_t scratch_space[16];
+
   ArraySpan() = default;
 
   explicit ArraySpan(const DataType* type, int64_t length) : type(type), length(length) {}
@@ -273,9 +278,7 @@ struct ARROW_EXPORT ArraySpan {
   ArraySpan(const ArrayData& data) {  // NOLINT implicit conversion
     SetMembers(data);
   }
-  ArraySpan(const Scalar& data) {  // NOLINT implicit converstion
-    FillFromScalar(data);
-  }
+  explicit ArraySpan(const Scalar& data) { FillFromScalar(data); }
 
   /// If dictionary-encoded, put dictionary in the first entry
   std::vector<ArraySpan> child_data;
@@ -343,10 +346,14 @@ struct ARROW_EXPORT ArraySpan {
   std::shared_ptr<Array> ToArray() const;
 
   std::shared_ptr<Buffer> GetBuffer(int index) const {
-    if (this->buffers[index].owner == NULLPTR) {
-      return NULLPTR;
+    const BufferSpan& buf = this->buffers[index];
+    if (buf.owner) {
+      return *buf.owner;
+    } else if (buf.data != NULLPTR) {
+      // Buffer points to some memory without an owning buffer
+      return std::make_shared<Buffer>(buf.data, buf.size);
     } else {
-      return *this->buffers[index].owner;
+      return NULLPTR;
     }
   }
 
@@ -372,6 +379,8 @@ struct ARROW_EXPORT ArraySpan {
 
 namespace internal {
 
+void FillZeroLengthArray(const DataType* type, ArraySpan* span);
+
 /// Construct a zero-copy view of this ArrayData with the given type.
 ///
 /// This method checks if the types are layout-compatible.
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index e5b4ab39493..c0cdcab730c 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -664,22 +664,20 @@ class RepeatedArrayFactory {
   }
 
   Status Visit(const SparseUnionType& type) {
-    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
-    const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
+    const auto& union_scalar = checked_cast<const SparseUnionScalar&>(scalar_);
     const auto scalar_type_code = union_scalar.type_code;
-    const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
 
     // Create child arrays: most of them are all-null, except for the child array
     // for the given type code (if the scalar is valid).
     ArrayVector fields;
     for (int i = 0; i < type.num_fields(); ++i) {
       fields.emplace_back();
-      if (i == scalar_child_id && scalar_.is_valid) {
-        ARROW_ASSIGN_OR_RAISE(fields.back(),
-                              MakeArrayFromScalar(*union_scalar.value, length_, pool_));
-      } else {
+      if (i == union_scalar.child_id && scalar_.is_valid) {
         ARROW_ASSIGN_OR_RAISE(
-            fields.back(), MakeArrayOfNull(union_type.field(i)->type(), length_, pool_));
+            fields.back(), MakeArrayFromScalar(*union_scalar.value[i], length_, pool_));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(fields.back(),
+                              MakeArrayOfNull(type.field(i)->type(), length_, pool_));
       }
     }
 
@@ -691,7 +689,7 @@ class RepeatedArrayFactory {
   }
 
   Status Visit(const DenseUnionType& type) {
-    const auto& union_scalar = checked_cast<const UnionScalar&>(scalar_);
+    const auto& union_scalar = checked_cast<const DenseUnionScalar&>(scalar_);
     const auto& union_type = checked_cast<const UnionType&>(*scalar_.type);
     const auto scalar_type_code = union_scalar.type_code;
     const auto scalar_child_id = union_type.child_ids()[scalar_type_code];
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 8af319ed9ea..c5406ee583f 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -796,12 +796,19 @@ class ScalarEqualsVisitor {
     return Status::OK();
   }
 
-  Status Visit(const UnionScalar& left) {
-    const auto& right = checked_cast<const UnionScalar&>(right_);
+  Status Visit(const DenseUnionScalar& left) {
+    const auto& right = checked_cast<const DenseUnionScalar&>(right_);
     result_ = ScalarEquals(*left.value, *right.value, options_, floating_approximate_);
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionScalar& left) {
+    const auto& right = checked_cast<const SparseUnionScalar&>(right_);
+    result_ = ScalarEquals(*left.value[left.child_id], *right.value[right.child_id],
+                           options_, floating_approximate_);
+    return Status::OK();
+  }
+
   Status Visit(const DictionaryScalar& left) {
     const auto& right = checked_cast<const DictionaryScalar&>(right_);
     result_ = ScalarEquals(*left.value.index, *right.value.index, options_,
diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc
index 4ebdecf5e78..ff1d6619905 100644
--- a/cpp/src/arrow/compute/api_vector.cc
+++ b/cpp/src/arrow/compute/api_vector.cc
@@ -347,11 +347,11 @@ Result<Datum> Filter(const Datum& values, const Datum& filter,
   return CallFunction("filter", {values, filter}, &options, ctx);
 }
 
-Result<Datum> Take(const Datum& values, const Datum& filter, const TakeOptions& options,
+Result<Datum> Take(const Datum& values, const Datum& indices, const TakeOptions& options,
                    ExecContext* ctx) {
   // Invoke metafunction which deals with Datum kinds other than just Array,
   // ChunkedArray.
-  return CallFunction("take", {values, filter}, &options, ctx);
+  return CallFunction("take", {values, indices}, &options, ctx);
 }
 
 Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index bd49041b4f3..21257e05602 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -69,9 +69,9 @@ void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTabl
 // Private version of GetCastFunction with better error reporting
 // if the input type is known.
 Result<std::shared_ptr<CastFunction>> GetCastFunctionInternal(
-    const std::shared_ptr<DataType>& to_type, const DataType* from_type = nullptr) {
+    const TypeHolder& to_type, const DataType* from_type = nullptr) {
   internal::EnsureInitCastTable();
-  auto it = internal::g_cast_table.find(static_cast<int>(to_type->id()));
+  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
   if (it == internal::g_cast_table.end()) {
     if (from_type != nullptr) {
       return Status::NotImplemented("Unsupported cast from ", *from_type, " to ",
@@ -139,18 +139,6 @@ void RegisterScalarCast(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::make_shared<CastMetaFunction>()));
   DCHECK_OK(registry->AddFunctionOptionsType(kCastOptionsType));
 }
-}  // namespace internal
-
-CastOptions::CastOptions(bool safe)
-    : FunctionOptions(internal::kCastOptionsType),
-      allow_int_overflow(!safe),
-      allow_time_truncate(!safe),
-      allow_time_overflow(!safe),
-      allow_decimal_truncate(!safe),
-      allow_float_truncate(!safe),
-      allow_invalid_utf8(!safe) {}
-
-constexpr char CastOptions::kTypeName[];
 
 CastFunction::CastFunction(std::string name, Type::type out_type_id)
     : ScalarFunction(std::move(name), Arity::Unary(), FunctionDoc::Empty()),
@@ -177,18 +165,18 @@ Status CastFunction::AddKernel(Type::type in_type_id, std::vector<InputType> in_
 }
 
 Result<const Kernel*> CastFunction::DispatchExact(
-    const std::vector<ValueDescr>& values) const {
-  RETURN_NOT_OK(CheckArity(values));
+    const std::vector<TypeHolder>& types) const {
+  RETURN_NOT_OK(CheckArity(types.size()));
 
   std::vector<const ScalarKernel*> candidate_kernels;
   for (const auto& kernel : kernels_) {
-    if (kernel.signature->MatchesInputs(values)) {
+    if (kernel.signature->MatchesInputs(types)) {
       candidate_kernels.push_back(&kernel);
     }
   }
 
   if (candidate_kernels.size() == 0) {
-    return Status::NotImplemented("Unsupported cast from ", values[0].type->ToString(),
+    return Status::NotImplemented("Unsupported cast from ", types[0].type->ToString(),
                                   " to ", ToTypeName(out_type_id_), " using function ",
                                   this->name());
   }
@@ -213,28 +201,40 @@ Result<const Kernel*> CastFunction::DispatchExact(
   return candidate_kernels[0];
 }
 
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const TypeHolder& to_type) {
+  return internal::GetCastFunctionInternal(to_type);
+}
+
+}  // namespace internal
+
+CastOptions::CastOptions(bool safe)
+    : FunctionOptions(internal::kCastOptionsType),
+      allow_int_overflow(!safe),
+      allow_time_truncate(!safe),
+      allow_time_overflow(!safe),
+      allow_decimal_truncate(!safe),
+      allow_float_truncate(!safe),
+      allow_invalid_utf8(!safe) {}
+
+constexpr char CastOptions::kTypeName[];
+
 Result<Datum> Cast(const Datum& value, const CastOptions& options, ExecContext* ctx) {
   return CallFunction("cast", {value}, &options, ctx);
 }
 
-Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
+Result<Datum> Cast(const Datum& value, const TypeHolder& to_type,
                    const CastOptions& options, ExecContext* ctx) {
   CastOptions options_with_to_type = options;
   options_with_to_type.to_type = to_type;
   return Cast(value, options_with_to_type, ctx);
 }
 
-Result<std::shared_ptr<Array>> Cast(const Array& value, std::shared_ptr<DataType> to_type,
+Result<std::shared_ptr<Array>> Cast(const Array& value, const TypeHolder& to_type,
                                     const CastOptions& options, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result, Cast(Datum(value), to_type, options, ctx));
   return result.make_array();
 }
 
-Result<std::shared_ptr<CastFunction>> GetCastFunction(
-    const std::shared_ptr<DataType>& to_type) {
-  return internal::GetCastFunctionInternal(to_type);
-}
-
 bool CanCast(const DataType& from_type, const DataType& to_type) {
   internal::EnsureInitCastTable();
   auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
@@ -242,7 +242,7 @@ bool CanCast(const DataType& from_type, const DataType& to_type) {
     return false;
   }
 
-  const CastFunction* function = it->second.get();
+  const internal::CastFunction* function = it->second.get();
   DCHECK_EQ(function->out_type_id(), to_type.id());
 
   for (auto from_id : function->in_type_ids()) {
@@ -253,21 +253,5 @@ bool CanCast(const DataType& from_type, const DataType& to_type) {
   return false;
 }
 
-Result<std::vector<Datum>> Cast(std::vector<Datum> datums, std::vector<ValueDescr> descrs,
-                                ExecContext* ctx) {
-  for (size_t i = 0; i != datums.size(); ++i) {
-    if (descrs[i] != datums[i].descr()) {
-      if (descrs[i].shape != datums[i].shape()) {
-        return Status::NotImplemented("casting between Datum shapes");
-      }
-
-      ARROW_ASSIGN_OR_RAISE(datums[i],
-                            Cast(datums[i], CastOptions::Safe(descrs[i].type), ctx));
-    }
-  }
-
-  return datums;
-}
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index e9c3cf55da9..7432933a124 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -22,8 +22,7 @@
 #include <vector>
 
 #include "arrow/compute/function.h"
-#include "arrow/compute/kernel.h"
-#include "arrow/datum.h"
+#include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -46,13 +45,13 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
   explicit CastOptions(bool safe = true);
 
   static constexpr char const kTypeName[] = "CastOptions";
-  static CastOptions Safe(std::shared_ptr<DataType> to_type = NULLPTR) {
+  static CastOptions Safe(TypeHolder to_type = {}) {
     CastOptions safe(true);
     safe.to_type = std::move(to_type);
     return safe;
   }
 
-  static CastOptions Unsafe(std::shared_ptr<DataType> to_type = NULLPTR) {
+  static CastOptions Unsafe(TypeHolder to_type = {}) {
     CastOptions unsafe(false);
     unsafe.to_type = std::move(to_type);
     return unsafe;
@@ -60,7 +59,7 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
 
   // Type being casted to. May be passed separate to eager function
   // compute::Cast
-  std::shared_ptr<DataType> to_type;
+  TypeHolder to_type;
 
   bool allow_int_overflow;
   bool allow_time_truncate;
@@ -74,36 +73,6 @@ class ARROW_EXPORT CastOptions : public FunctionOptions {
 
 /// @}
 
-// Cast functions are _not_ registered in the FunctionRegistry, though they use
-// the same execution machinery
-class CastFunction : public ScalarFunction {
- public:
-  CastFunction(std::string name, Type::type out_type_id);
-
-  Type::type out_type_id() const { return out_type_id_; }
-  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
-
-  Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
-                   OutputType out_type, ArrayKernelExec exec,
-                   NullHandling::type = NullHandling::INTERSECTION,
-                   MemAllocation::type = MemAllocation::PREALLOCATE);
-
-  // Note, this function toggles off memory allocation and sets the init
-  // function to CastInit
-  Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
-
-  Result<const Kernel*> DispatchExact(
-      const std::vector<ValueDescr>& values) const override;
-
- private:
-  std::vector<Type::type> in_type_ids_;
-  const Type::type out_type_id_;
-};
-
-ARROW_EXPORT
-Result<std::shared_ptr<CastFunction>> GetCastFunction(
-    const std::shared_ptr<DataType>& to_type);
-
 /// \brief Return true if a cast function is defined
 ARROW_EXPORT
 bool CanCast(const DataType& from_type, const DataType& to_type);
@@ -121,7 +90,7 @@ bool CanCast(const DataType& from_type, const DataType& to_type);
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<std::shared_ptr<Array>> Cast(const Array& value, std::shared_ptr<DataType> to_type,
+Result<std::shared_ptr<Array>> Cast(const Array& value, const TypeHolder& to_type,
                                     const CastOptions& options = CastOptions::Safe(),
                                     ExecContext* ctx = NULLPTR);
 
@@ -147,21 +116,9 @@ Result<Datum> Cast(const Datum& value, const CastOptions& options,
 /// \since 1.0.0
 /// \note API not yet finalized
 ARROW_EXPORT
-Result<Datum> Cast(const Datum& value, std::shared_ptr<DataType> to_type,
+Result<Datum> Cast(const Datum& value, const TypeHolder& to_type,
                    const CastOptions& options = CastOptions::Safe(),
                    ExecContext* ctx = NULLPTR);
 
-/// \brief Cast several values simultaneously. Safe cast options are used.
-/// \param[in] values datums to cast
-/// \param[in] descrs ValueDescrs to cast to
-/// \param[in] ctx the function execution context, optional
-/// \return the resulting datums
-///
-/// \since 4.0.0
-/// \note API not yet finalized
-ARROW_EXPORT
-Result<std::vector<Datum>> Cast(std::vector<Datum> values, std::vector<ValueDescr> descrs,
-                                ExecContext* ctx = NULLPTR);
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/cast_internal.h b/cpp/src/arrow/compute/cast_internal.h
index 0105d08a573..bfa2a110cd7 100644
--- a/cpp/src/arrow/compute/cast_internal.h
+++ b/cpp/src/arrow/compute/cast_internal.h
@@ -30,6 +30,32 @@ namespace internal {
 
 using CastState = OptionsWrapper<CastOptions>;
 
+// Cast functions are _not_ registered in the FunctionRegistry, though they use
+// the same execution machinery
+class CastFunction : public ScalarFunction {
+ public:
+  CastFunction(std::string name, Type::type out_type_id);
+
+  Type::type out_type_id() const { return out_type_id_; }
+  const std::vector<Type::type>& in_type_ids() const { return in_type_ids_; }
+
+  Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
+                   OutputType out_type, ArrayKernelExec exec,
+                   NullHandling::type = NullHandling::INTERSECTION,
+                   MemAllocation::type = MemAllocation::PREALLOCATE);
+
+  // Note, this function toggles off memory allocation and sets the init
+  // function to CastInit
+  Status AddKernel(Type::type in_type_id, ScalarKernel kernel);
+
+  Result<const Kernel*> DispatchExact(
+      const std::vector<TypeHolder>& types) const override;
+
+ private:
+  std::vector<Type::type> in_type_ids_;
+  const Type::type out_type_id_;
+};
+
 // See kernels/scalar_cast_*.cc for these
 std::vector<std::shared_ptr<CastFunction>> GetBooleanCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNumericCasts();
@@ -38,6 +64,9 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts();
 std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
 std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
+ARROW_EXPORT
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const TypeHolder& to_type);
+
 }  // namespace internal
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index a612a83e7a8..c5b1dfaca0e 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -219,16 +219,6 @@ void ComputeDataPreallocate(const DataType& type,
 
 namespace detail {
 
-Status CheckAllValues(const std::vector<Datum>& values) {
-  for (const auto& value : values) {
-    if (!value.is_value()) {
-      return Status::Invalid("Tried executing function with non-value type: ",
-                             value.ToString());
-    }
-  }
-  return Status::OK();
-}
-
 ExecBatchIterator::ExecBatchIterator(std::vector<Datum> args, int64_t length,
                                      int64_t max_chunksize)
     : args_(std::move(args)),
@@ -249,9 +239,7 @@ Result<std::unique_ptr<ExecBatchIterator>> ExecBatchIterator::Make(
     }
   }
 
-  // If the arguments are all scalars, then the length is 1
-  int64_t length = 1;
-
+  int64_t length = -1;
   bool length_set = false;
   for (auto& arg : args) {
     if (arg.is_scalar()) {
@@ -267,6 +255,11 @@ Result<std::unique_ptr<ExecBatchIterator>> ExecBatchIterator::Make(
     }
   }
 
+  if (!length_set) {
+    // All scalar case, to be removed soon
+    length = 1;
+  }
+
   max_chunksize = std::min(length, max_chunksize);
 
   return std::unique_ptr<ExecBatchIterator>(
@@ -328,8 +321,17 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
 // ----------------------------------------------------------------------
 // ExecSpanIterator; to eventually replace ExecBatchIterator
 
-Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_shape,
-                              int64_t max_chunksize) {
+bool CheckIfAllScalar(const ExecBatch& batch) {
+  for (const Datum& value : batch.values) {
+    if (!value.is_scalar()) {
+      DCHECK(value.is_arraylike());
+      return false;
+    }
+  }
+  return batch.num_values() > 0;
+}
+
+Status ExecSpanIterator::Init(const ExecBatch& batch, int64_t max_chunksize) {
   if (batch.num_values() > 0) {
     // Validate arguments
     bool all_args_same_length = false;
@@ -343,8 +345,9 @@ Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_s
   }
   args_ = &batch.values;
   initialized_ = have_chunked_arrays_ = false;
+  have_all_scalars_ = CheckIfAllScalar(batch);
   position_ = 0;
-  length_ = output_shape == ValueDescr::SCALAR ? 1 : batch.length;
+  length_ = batch.length;
   chunk_indexes_.clear();
   chunk_indexes_.resize(args_->size(), 0);
   value_positions_.clear();
@@ -358,8 +361,7 @@ Status ExecSpanIterator::Init(const ExecBatch& batch, ValueDescr::Shape output_s
 int64_t ExecSpanIterator::GetNextChunkSpan(int64_t iteration_size, ExecSpan* span) {
   for (size_t i = 0; i < args_->size() && iteration_size > 0; ++i) {
     // If the argument is not a chunked array, it's either a Scalar or Array,
-    // in which case it doesn't influence the size of this span. Note that if
-    // the args are all scalars the span length is 1
+    // in which case it doesn't influence the size of this span
     if (!args_->at(i).is_chunked_array()) {
       continue;
     }
@@ -385,13 +387,20 @@ int64_t ExecSpanIterator::GetNextChunkSpan(int64_t iteration_size, ExecSpan* spa
   return iteration_size;
 }
 
-bool ExecSpanIterator::Next(ExecSpan* span) {
-  if (position_ == length_) {
-    // This also protects from degenerate cases like ChunkedArrays
-    // without any chunks
-    return false;
+void PromoteExecSpanScalars(ExecSpan* span) {
+  // In the "all scalar" case, we "promote" the scalars to ArraySpans of
+  // length 1, since the kernel implementations do not handle the all
+  // scalar case
+  for (int i = 0; i < span->num_values(); ++i) {
+    ExecValue* value = &span->values[i];
+    if (value->is_scalar()) {
+      value->array.FillFromScalar(*value->scalar);
+      value->scalar = nullptr;
+    }
   }
+}
 
+bool ExecSpanIterator::Next(ExecSpan* span) {
   if (!initialized_) {
     span->length = 0;
 
@@ -402,25 +411,36 @@ bool ExecSpanIterator::Next(ExecSpan* span) {
     // iteration
     span->values.resize(args_->size());
     for (size_t i = 0; i < args_->size(); ++i) {
-      if (args_->at(i).is_scalar()) {
-        span->values[i].SetScalar(args_->at(i).scalar().get());
-      } else if (args_->at(i).is_array()) {
-        const ArrayData& arr = *args_->at(i).array();
+      const Datum& arg = (*args_)[i];
+      if (arg.is_scalar()) {
+        span->values[i].SetScalar(arg.scalar().get());
+      } else if (arg.is_array()) {
+        const ArrayData& arr = *arg.array();
         span->values[i].SetArray(arr);
         value_offsets_[i] = arr.offset;
       } else {
         // Populate members from the first chunk
-        const Array* first_chunk = args_->at(i).chunked_array()->chunk(0).get();
-        const ArrayData& arr = *first_chunk->data();
-        span->values[i].SetArray(arr);
-        value_offsets_[i] = arr.offset;
+        const ChunkedArray& carr = *arg.chunked_array();
+        if (carr.num_chunks() > 0) {
+          const ArrayData& arr = *carr.chunk(0)->data();
+          span->values[i].SetArray(arr);
+          value_offsets_[i] = arr.offset;
+        } else {
+          // Fill as zero-length array
+          internal::FillZeroLengthArray(carr.type().get(), &span->values[i].array);
+          span->values[i].scalar = nullptr;
+        }
         have_chunked_arrays_ = true;
       }
     }
-    initialized_ = true;
-  }
 
-  if (position_ == length_) {
+    if (have_all_scalars_) {
+      PromoteExecSpanScalars(span);
+    }
+
+    initialized_ = true;
+  } else if (position_ == length_) {
+    // We've emitted at least one span and we're at the end so we are done
     return false;
   }
 
@@ -441,6 +461,7 @@ bool ExecSpanIterator::Next(ExecSpan* span) {
       value_positions_[i] += iteration_size;
     }
   }
+
   position_ += iteration_size;
   DCHECK_LE(position_, length_);
   return true;
@@ -662,7 +683,7 @@ class NullPropagator {
 };
 
 std::shared_ptr<ChunkedArray> ToChunkedArray(const std::vector<Datum>& values,
-                                             const std::shared_ptr<DataType>& type) {
+                                             const TypeHolder& type) {
   std::vector<std::shared_ptr<Array>> arrays;
   arrays.reserve(values.size());
   for (const Datum& val : values) {
@@ -672,7 +693,7 @@ std::shared_ptr<ChunkedArray> ToChunkedArray(const std::vector<Datum>& values,
     }
     arrays.emplace_back(val.make_array());
   }
-  return std::make_shared<ChunkedArray>(std::move(arrays), type);
+  return std::make_shared<ChunkedArray>(std::move(arrays), type.GetSharedPtr());
 }
 
 bool HaveChunkedArray(const std::vector<Datum>& values) {
@@ -691,9 +712,9 @@ class KernelExecutorImpl : public KernelExecutor {
     kernel_ctx_ = kernel_ctx;
     kernel_ = static_cast<const KernelType*>(args.kernel);
 
-    // Resolve the output descriptor for this kernel
+    // Resolve the output type for this kernel
     ARROW_ASSIGN_OR_RAISE(
-        output_descr_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs));
+        output_type_, kernel_->signature->out_type().Resolve(kernel_ctx_, args.inputs));
 
     return Status::OK();
   }
@@ -703,7 +724,7 @@ class KernelExecutorImpl : public KernelExecutor {
   // Kernel::mem_allocation is not MemAllocation::PREALLOCATE, then no
   // data buffers will be set
   Result<std::shared_ptr<ArrayData>> PrepareOutput(int64_t length) {
-    auto out = std::make_shared<ArrayData>(output_descr_.type, length);
+    auto out = std::make_shared<ArrayData>(output_type_.GetSharedPtr(), length);
     out->buffers.resize(output_num_buffers_);
 
     if (validity_preallocated_) {
@@ -726,10 +747,10 @@ class KernelExecutorImpl : public KernelExecutor {
 
   Status CheckResultType(const Datum& out, const char* function_name) override {
     const auto& type = out.type();
-    if (type != nullptr && !type->Equals(output_descr_.type)) {
+    if (type != nullptr && !type->Equals(*output_type_.type)) {
       return Status::TypeError(
           "kernel type result mismatch for function '", function_name, "': declared as ",
-          output_descr_.type->ToString(), ", actual is ", type->ToString());
+          output_type_.type->ToString(), ", actual is ", type->ToString());
     }
     return Status::OK();
   }
@@ -741,7 +762,7 @@ class KernelExecutorImpl : public KernelExecutor {
 
   KernelContext* kernel_ctx_;
   const KernelType* kernel_;
-  ValueDescr output_descr_;
+  TypeHolder output_type_;
 
   int output_num_buffers_;
 
@@ -757,18 +778,23 @@ class KernelExecutorImpl : public KernelExecutor {
 class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
  public:
   Status Execute(const ExecBatch& batch, ExecListener* listener) override {
-    RETURN_NOT_OK(span_iterator_.Init(batch, output_descr_.shape,
-                                      exec_context()->exec_chunksize()));
+    RETURN_NOT_OK(span_iterator_.Init(batch, exec_context()->exec_chunksize()));
 
-    // TODO(wesm): remove if with ARROW-16757
-    if (output_descr_.shape != ValueDescr::SCALAR) {
-      // If the executor is configured to produce a single large Array output for
-      // kernels supporting preallocation, then we do so up front and then
-      // iterate over slices of that large array. Otherwise, we preallocate prior
-      // to processing each span emitted from the ExecSpanIterator
-      RETURN_NOT_OK(SetupPreallocation(span_iterator_.length(), batch.values));
+    if (batch.length == 0) {
+      // For zero-length batches, we do nothing except return a zero-length
+      // array of the correct output type
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
+                            MakeArrayOfNull(output_type_.GetSharedPtr(), /*length=*/0,
+                                            exec_context()->memory_pool()));
+      return EmitResult(result->data(), listener);
     }
 
+    // If the executor is configured to produce a single large Array output for
+    // kernels supporting preallocation, then we do so up front and then
+    // iterate over slices of that large array. Otherwise, we preallocate prior
+    // to processing each span emitted from the ExecSpanIterator
+    RETURN_NOT_OK(SetupPreallocation(span_iterator_.length(), batch.values));
+
     // ARROW-16756: Here we have to accommodate the distinct cases
     //
     // * Fully-preallocated contiguous output
@@ -784,30 +810,28 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
 
   Datum WrapResults(const std::vector<Datum>& inputs,
                     const std::vector<Datum>& outputs) override {
-    if (output_descr_.shape == ValueDescr::SCALAR) {
-      // TODO(wesm): to remove, see ARROW-16757
-      DCHECK_EQ(outputs.size(), 1);
-      // Return as SCALAR
-      return outputs[0];
+    // If execution yielded multiple chunks (because large arrays were split
+    // based on the ExecContext parameters, then the result is a ChunkedArray
+    if (HaveChunkedArray(inputs) || outputs.size() > 1) {
+      return ToChunkedArray(outputs, output_type_);
     } else {
-      // If execution yielded multiple chunks (because large arrays were split
-      // based on the ExecContext parameters, then the result is a ChunkedArray
-      if (HaveChunkedArray(inputs) || outputs.size() > 1) {
-        return ToChunkedArray(outputs, output_descr_.type);
-      } else if (outputs.size() == 1) {
-        // Outputs have just one element
-        return outputs[0];
-      } else {
-        // XXX: In the case where no outputs are omitted, is returning a 0-length
-        // array always the correct move?
-        return MakeArrayOfNull(output_descr_.type, /*length=*/0,
-                               exec_context()->memory_pool())
-            .ValueOrDie();
-      }
+      // Outputs have just one element
+      return outputs[0];
     }
   }
 
  protected:
+  Status EmitResult(std::shared_ptr<ArrayData> out, ExecListener* listener) {
+    if (span_iterator_.have_all_scalars()) {
+      // ARROW-16757 We boxed scalar inputs as ArraySpan, so now we have to
+      // unbox the output as a scalar
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar, MakeArray(out)->GetScalar(0));
+      return listener->OnResult(std::move(scalar));
+    } else {
+      return listener->OnResult(std::move(out));
+    }
+  }
+
   Status ExecuteSpans(ExecListener* listener) {
     // We put the preallocation in an ArraySpan to be passed to the
     // kernel which is expecting to receive that. More
@@ -817,6 +841,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     ExecSpan input;
     ExecResult output;
     ArraySpan* output_span = output.array_span();
+
     if (preallocate_contiguous_) {
       // Make one big output allocation
       ARROW_ASSIGN_OR_RAISE(preallocation, PrepareOutput(span_iterator_.length()));
@@ -832,7 +857,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
 
       // Kernel execution is complete; emit result
-      RETURN_NOT_OK(listener->OnResult(std::move(preallocation)));
+      return EmitResult(std::move(preallocation), listener);
     } else {
       // Fully preallocating, but not contiguously
       // We preallocate (maybe) only for the output of processing the current
@@ -842,15 +867,15 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
         output_span->SetMembers(*preallocation);
         RETURN_NOT_OK(ExecuteSingleSpan(input, &output));
         // Emit the result for this chunk
-        RETURN_NOT_OK(listener->OnResult(std::move(preallocation)));
+        RETURN_NOT_OK(EmitResult(std::move(preallocation), listener));
       }
+      return Status::OK();
     }
-    return Status::OK();
   }
 
   Status ExecuteSingleSpan(const ExecSpan& input, ExecResult* out) {
     ArraySpan* result_span = out->array_span();
-    if (output_descr_.type->id() == Type::NA) {
+    if (output_type_.type->id() == Type::NA) {
       result_span->null_count = result_span->length;
     } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
       if (!elide_validity_bitmap_) {
@@ -859,7 +884,10 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
       result_span->null_count = 0;
     }
-    return kernel_->exec(kernel_ctx_, input, out);
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, input, out));
+    // Output type didn't change
+    DCHECK(out->is_array_span());
+    return Status::OK();
   }
 
   Status ExecuteNonSpans(ExecListener* listener) {
@@ -873,60 +901,32 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
     ExecSpan input;
     ExecResult output;
     while (span_iterator_.Next(&input)) {
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        ARROW_ASSIGN_OR_RAISE(output.value, PrepareOutput(input.length));
-        DCHECK(output.is_array_data());
-      } else {
-        // For scalar outputs, we set a null scalar of the correct type to
-        // communicate the output type to the kernel if needed
-        //
-        // XXX: Is there some way to avoid this step?
-        // TODO: Remove this path in ARROW-16757
-        output.value = MakeNullScalar(output_descr_.type);
-      }
+      ARROW_ASSIGN_OR_RAISE(output.value, PrepareOutput(input.length));
+      DCHECK(output.is_array_data());
 
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        ArrayData* out_arr = output.array_data().get();
-        if (output_descr_.type->id() == Type::NA) {
-          out_arr->null_count = out_arr->length;
-        } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
-          RETURN_NOT_OK(PropagateNulls(kernel_ctx_, input, out_arr));
-        } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
-          out_arr->null_count = 0;
-        }
-      } else {
-        // TODO(wesm): to remove, see ARROW-16757
-        if (kernel_->null_handling == NullHandling::INTERSECTION) {
-          // set scalar validity
-          output.scalar()->is_valid =
-              std::all_of(input.values.begin(), input.values.end(),
-                          [](const ExecValue& input) { return input.scalar->is_valid; });
-        } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
-          output.scalar()->is_valid = true;
-        }
+      ArrayData* out_arr = output.array_data().get();
+      if (output_type_.type->id() == Type::NA) {
+        out_arr->null_count = out_arr->length;
+      } else if (kernel_->null_handling == NullHandling::INTERSECTION) {
+        RETURN_NOT_OK(PropagateNulls(kernel_ctx_, input, out_arr));
+      } else if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
+        out_arr->null_count = 0;
       }
 
       RETURN_NOT_OK(kernel_->exec(kernel_ctx_, input, &output));
 
-      // Assert that the kernel did not alter the shape of the output
-      // type. After ARROW-16577 delete this since ValueDescr::SCALAR will not
-      // exist anymore
-      DCHECK(((output_descr_.shape == ValueDescr::ARRAY) && output.is_array_data()) ||
-             ((output_descr_.shape == ValueDescr::SCALAR) && output.is_scalar()));
+      // Output type didn't change
+      DCHECK(output.is_array_data());
 
       // Emit a result for each chunk
-      if (output_descr_.shape == ValueDescr::ARRAY) {
-        RETURN_NOT_OK(listener->OnResult(output.array_data()));
-      } else {
-        RETURN_NOT_OK(listener->OnResult(output.scalar()));
-      }
+      RETURN_NOT_OK(EmitResult(std::move(output.array_data()), listener));
     }
     return Status::OK();
   }
 
   Status SetupPreallocation(int64_t total_length, const std::vector<Datum>& args) {
-    output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
-    auto out_type_id = output_descr_.type->id();
+    output_num_buffers_ = static_cast<int>(output_type_.type->layout().buffers.size());
+    auto out_type_id = output_type_.type->id();
     // Default to no validity pre-allocation for following cases:
     // - Output Array is NullArray
     // - kernel_->null_handling is COMPUTED_NO_PREALLOCATE or OUTPUT_NOT_NULL
@@ -950,7 +950,7 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
       }
     }
     if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+      ComputeDataPreallocate(*output_type_.type, &data_preallocated_);
     }
 
     // Validity bitmap either preallocated or elided, and all data
@@ -995,14 +995,24 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
   ExecSpanIterator span_iterator_;
 };
 
+Status CheckCanExecuteChunked(const VectorKernel* kernel) {
+  if (kernel->exec_chunked == nullptr) {
+    return Status::Invalid(
+        "Vector kernel cannot execute chunkwise and no "
+        "chunked exec function was defined");
+  }
+
+  if (kernel->null_handling == NullHandling::INTERSECTION) {
+    return Status::Invalid(
+        "Null pre-propagation is unsupported for ChunkedArray "
+        "execution in vector kernels");
+  }
+  return Status::OK();
+}
+
 class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
  public:
   Status Execute(const ExecBatch& batch, ExecListener* listener) override {
-    // TODO(wesm): remove in ARROW-16577
-    if (output_descr_.shape == ValueDescr::SCALAR) {
-      return Status::Invalid("VectorExecutor only supports array output types");
-    }
-
     // Some vector kernels have a separate code path for handling
     // chunked arrays (VectorKernel::exec_chunked) so we check if we
     // have any chunked arrays. If we do and an exec_chunked function
@@ -1012,19 +1022,18 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
       if (arg.is_chunked_array()) have_chunked_arrays = true;
     }
 
-    output_num_buffers_ = static_cast<int>(output_descr_.type->layout().buffers.size());
+    output_num_buffers_ = static_cast<int>(output_type_.type->layout().buffers.size());
 
     // Decide if we need to preallocate memory for this kernel
     validity_preallocated_ =
         (kernel_->null_handling != NullHandling::COMPUTED_NO_PREALLOCATE &&
          kernel_->null_handling != NullHandling::OUTPUT_NOT_NULL);
     if (kernel_->mem_allocation == MemAllocation::PREALLOCATE) {
-      ComputeDataPreallocate(*output_descr_.type, &data_preallocated_);
+      ComputeDataPreallocate(*output_type_.type, &data_preallocated_);
     }
 
     if (kernel_->can_execute_chunkwise) {
-      RETURN_NOT_OK(span_iterator_.Init(batch, output_descr_.shape,
-                                        exec_context()->exec_chunksize()));
+      RETURN_NOT_OK(span_iterator_.Init(batch, exec_context()->exec_chunksize()));
       ExecSpan span;
       while (span_iterator_.Next(&span)) {
         RETURN_NOT_OK(Exec(span, listener));
@@ -1038,7 +1047,11 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
       } else {
         // No chunked arrays. We pack the args into an ExecSpan and
         // call the regular exec code path
-        RETURN_NOT_OK(Exec(ExecSpan(batch), listener));
+        ExecSpan span(batch);
+        if (CheckIfAllScalar(batch)) {
+          PromoteExecSpanScalars(&span);
+        }
+        RETURN_NOT_OK(Exec(span, listener));
       }
     }
 
@@ -1058,63 +1071,46 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
     // If execution yielded multiple chunks (because large arrays were split
     // based on the ExecContext parameters, then the result is a ChunkedArray
     if (kernel_->output_chunked && (HaveChunkedArray(inputs) || outputs.size() > 1)) {
-      return ToChunkedArray(outputs, output_descr_.type);
-    } else if (outputs.size() == 1) {
+      return ToChunkedArray(outputs, output_type_.GetSharedPtr());
+    } else {
       // Outputs have just one element
       return outputs[0];
-    } else {
-      // XXX: In the case where no outputs are omitted, is returning a 0-length
-      // array always the correct move?
-      return MakeArrayOfNull(output_descr_.type, /*length=*/0).ValueOrDie();
     }
   }
 
  protected:
-  Status Exec(const ExecSpan& span, ExecListener* listener) {
-    ExecResult out;
-
-    // We preallocate (maybe) only for the output of processing the current
-    // batch, but create an output ArrayData instance regardless
-    ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(span.length));
-
-    if (kernel_->null_handling == NullHandling::INTERSECTION) {
-      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get()));
-    }
-    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out));
+  Status EmitResult(Datum result, ExecListener* listener) {
     if (!kernel_->finalize) {
       // If there is no result finalizer (e.g. for hash-based functions, we can
       // emit the processed batch right away rather than waiting
-      RETURN_NOT_OK(listener->OnResult(out.array_data()));
+      RETURN_NOT_OK(listener->OnResult(std::move(result)));
     } else {
-      results_.emplace_back(out.array_data());
+      results_.emplace_back(std::move(result));
     }
     return Status::OK();
   }
 
-  Status ExecChunked(const ExecBatch& batch, ExecListener* listener) {
-    if (kernel_->exec_chunked == nullptr) {
-      return Status::Invalid(
-          "Vector kernel cannot execute chunkwise and no "
-          "chunked exec function was defined");
-    }
-
+  Status Exec(const ExecSpan& span, ExecListener* listener) {
+    ExecResult out;
+    ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(span.length));
     if (kernel_->null_handling == NullHandling::INTERSECTION) {
-      return Status::Invalid(
-          "Null pre-propagation is unsupported for ChunkedArray "
-          "execution in vector kernels");
+      RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get()));
     }
+    RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out));
+    return EmitResult(std::move(out.array_data()), listener);
+  }
 
+  Status ExecChunked(const ExecBatch& batch, ExecListener* listener) {
+    RETURN_NOT_OK(CheckCanExecuteChunked(kernel_));
     Datum out;
     ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(batch.length));
     RETURN_NOT_OK(kernel_->exec_chunked(kernel_ctx_, batch, &out));
-    if (!kernel_->finalize) {
-      // If there is no result finalizer (e.g. for hash-based functions, we can
-      // emit the processed batch right away rather than waiting
-      RETURN_NOT_OK(listener->OnResult(std::move(out)));
+    if (out.is_array()) {
+      return EmitResult(std::move(out.array()), listener);
     } else {
-      results_.emplace_back(std::move(out));
+      DCHECK(out.is_chunked_array());
+      return EmitResult(std::move(out.chunked_array()), listener);
     }
-    return Status::OK();
   }
 
   ExecSpanIterator span_iterator_;
@@ -1124,7 +1120,7 @@ class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
 class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  public:
   Status Init(KernelContext* ctx, KernelInitArgs args) override {
-    input_descrs_ = &args.inputs;
+    input_types_ = &args.inputs;
     options_ = args.options;
     return KernelExecutorImpl<ScalarAggregateKernel>::Init(ctx, args);
   }
@@ -1160,9 +1156,8 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
  private:
   Status Consume(const ExecBatch& batch) {
     // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
-    ARROW_ASSIGN_OR_RAISE(
-        auto batch_state,
-        kernel_->init(kernel_ctx_, {kernel_, *input_descrs_, options_}));
+    ARROW_ASSIGN_OR_RAISE(auto batch_state,
+                          kernel_->init(kernel_ctx_, {kernel_, *input_types_, options_}));
 
     if (batch_state == nullptr) {
       return Status::Invalid("ScalarAggregation requires non-null kernel state");
@@ -1177,7 +1172,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
   }
 
   std::unique_ptr<ExecBatchIterator> batch_iterator_;
-  const std::vector<ValueDescr>* input_descrs_;
+  const std::vector<TypeHolder>* input_types_;
   const FunctionOptions* options_;
 };
 
@@ -1358,8 +1353,7 @@ Result<std::shared_ptr<SelectionVector>> SelectionVector::FromMask(
 Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum>& args,
                            const FunctionOptions* options, ExecContext* ctx) {
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return CallFunction(func_name, args, options, &default_ctx);
+    ctx = default_exec_context();
   }
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<const Function> func,
                         ctx->func_registry()->GetFunction(func_name));
@@ -1374,8 +1368,7 @@ Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum
 Result<Datum> CallFunction(const std::string& func_name, const ExecBatch& batch,
                            const FunctionOptions* options, ExecContext* ctx) {
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return CallFunction(func_name, batch, options, &default_ctx);
+    ctx = default_exec_context();
   }
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<const Function> func,
                         ctx->func_registry()->GetFunction(func_name));
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index 8fd938ce299..f0b951dccb8 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -235,12 +235,11 @@ struct ARROW_EXPORT ExecBatch {
 
   ExecBatch Slice(int64_t offset, int64_t length) const;
 
-  /// \brief A convenience for returning the ValueDescr objects (types and
-  /// shapes) from the batch.
-  std::vector<ValueDescr> GetDescriptors() const {
-    std::vector<ValueDescr> result;
+  /// \brief A convenience for returning the types from the batch.
+  std::vector<TypeHolder> GetTypes() const {
+    std::vector<TypeHolder> result;
     for (const auto& value : this->values) {
-      result.emplace_back(value.descr());
+      result.emplace_back(value.type());
     }
     return result;
   }
@@ -254,19 +253,16 @@ inline bool operator==(const ExecBatch& l, const ExecBatch& r) { return l.Equals
 inline bool operator!=(const ExecBatch& l, const ExecBatch& r) { return !l.Equals(r); }
 
 struct ExecValue {
-  enum Kind { ARRAY, SCALAR };
-  Kind kind = ARRAY;
   ArraySpan array;
-  const Scalar* scalar;
+  const Scalar* scalar = NULLPTR;
 
   ExecValue(Scalar* scalar)  // NOLINT implicit conversion
-      : kind(SCALAR), scalar(scalar) {}
+      : scalar(scalar) {}
 
   ExecValue(ArraySpan array)  // NOLINT implicit conversion
-      : kind(ARRAY), array(std::move(array)) {}
+      : array(std::move(array)) {}
 
-  ExecValue(const ArrayData& array)  // NOLINT implicit conversion
-      : kind(ARRAY) {
+  ExecValue(const ArrayData& array) {  // NOLINT implicit conversion
     this->array.SetMembers(array);
   }
 
@@ -278,31 +274,21 @@ struct ExecValue {
 
   int64_t length() const { return this->is_array() ? this->array.length : 1; }
 
-  bool is_array() const { return this->kind == ARRAY; }
-  bool is_scalar() const { return this->kind == SCALAR; }
+  bool is_array() const { return this->scalar == NULLPTR; }
+  bool is_scalar() const { return !this->is_array(); }
 
   void SetArray(const ArrayData& array) {
-    this->kind = ARRAY;
     this->array.SetMembers(array);
+    this->scalar = NULLPTR;
   }
 
-  void SetScalar(const Scalar* scalar) {
-    this->kind = SCALAR;
-    this->scalar = scalar;
-  }
+  void SetScalar(const Scalar* scalar) { this->scalar = scalar; }
 
   template <typename ExactType>
   const ExactType& scalar_as() const {
     return ::arrow::internal::checked_cast<const ExactType&>(*this->scalar);
   }
 
-  /// XXX: here only temporarily until type resolution can be cleaned
-  /// up to not use ValueDescr
-  ValueDescr descr() const {
-    ValueDescr::Shape shape = this->is_array() ? ValueDescr::ARRAY : ValueDescr::SCALAR;
-    return ValueDescr(const_cast<DataType*>(this->type())->shared_from_this(), shape);
-  }
-
   /// XXX: here temporarily for compatibility with datum, see
   /// e.g. MakeStructExec in scalar_nested.cc
   int64_t null_count() const {
@@ -314,7 +300,7 @@ struct ExecValue {
   }
 
   const DataType* type() const {
-    if (this->kind == ARRAY) {
+    if (this->is_array()) {
       return array.type;
     } else {
       return scalar->type.get();
@@ -324,29 +310,21 @@ struct ExecValue {
 
 struct ARROW_EXPORT ExecResult {
   // The default value of the variant is ArraySpan
-  // TODO(wesm): remove Scalar output modality in ARROW-16577
-  util::Variant<ArraySpan, std::shared_ptr<ArrayData>, std::shared_ptr<Scalar>> value;
+  util::Variant<ArraySpan, std::shared_ptr<ArrayData>> value;
 
   int64_t length() const {
     if (this->is_array_span()) {
       return this->array_span()->length;
-    } else if (this->is_array_data()) {
-      return this->array_data()->length;
     } else {
-      // Should not reach here
-      return 1;
+      return this->array_data()->length;
     }
   }
 
   const DataType* type() const {
-    switch (this->value.index()) {
-      case 0:
-        return this->array_span()->type;
-      case 1:
-        return this->array_data()->type.get();
-      default:
-        // scalar
-        return this->scalar()->type.get();
+    if (this->is_array_span()) {
+      return this->array_span()->type;
+    } else {
+      return this->array_data()->type.get();
     }
   }
 
@@ -360,12 +338,6 @@ struct ARROW_EXPORT ExecResult {
   }
 
   bool is_array_data() const { return this->value.index() == 1; }
-
-  const std::shared_ptr<Scalar>& scalar() const {
-    return util::get<std::shared_ptr<Scalar>>(this->value);
-  }
-
-  bool is_scalar() const { return this->value.index() == 2; }
 };
 
 /// \brief A "lightweight" column batch object which contains no
@@ -395,15 +367,6 @@ struct ARROW_EXPORT ExecSpan {
     }
   }
 
-  bool is_all_scalar() const {
-    for (const ExecValue& value : this->values) {
-      if (value.is_array()) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   /// \brief Return the value at the i-th index
   template <typename index_type>
   inline const ExecValue& operator[](index_type i) const {
@@ -412,7 +375,7 @@ struct ARROW_EXPORT ExecSpan {
 
   void AddOffset(int64_t offset) {
     for (ExecValue& value : values) {
-      if (value.kind == ExecValue::ARRAY) {
+      if (value.is_array()) {
         value.array.AddOffset(offset);
       }
     }
@@ -420,7 +383,7 @@ struct ARROW_EXPORT ExecSpan {
 
   void SetOffset(int64_t offset) {
     for (ExecValue& value : values) {
-      if (value.kind == ExecValue::ARRAY) {
+      if (value.is_array()) {
         value.array.SetOffset(offset);
       }
     }
@@ -429,12 +392,10 @@ struct ARROW_EXPORT ExecSpan {
   /// \brief A convenience for the number of values / arguments.
   int num_values() const { return static_cast<int>(values.size()); }
 
-  // XXX: eliminate the need for ValueDescr; copied temporarily from
-  // ExecBatch
-  std::vector<ValueDescr> GetDescriptors() const {
-    std::vector<ValueDescr> result;
+  std::vector<TypeHolder> GetTypes() const {
+    std::vector<TypeHolder> result;
     for (const auto& value : this->values) {
-      result.emplace_back(value.descr());
+      result.emplace_back(value.type());
     }
     return result;
   }
diff --git a/cpp/src/arrow/compute/exec/aggregate.cc b/cpp/src/arrow/compute/exec/aggregate.cc
index 41b5bb75b66..5cb9a9c5633 100644
--- a/cpp/src/arrow/compute/exec/aggregate.cc
+++ b/cpp/src/arrow/compute/exec/aggregate.cc
@@ -31,20 +31,19 @@ namespace internal {
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs) {
-  if (aggregates.size() != in_descrs.size()) {
+    const std::vector<TypeHolder>& in_types) {
+  if (aggregates.size() != in_types.size()) {
     return Status::Invalid(aggregates.size(), " aggregate functions were specified but ",
-                           in_descrs.size(), " arguments were provided.");
+                           in_types.size(), " arguments were provided.");
   }
 
-  std::vector<const HashAggregateKernel*> kernels(in_descrs.size());
+  std::vector<const HashAggregateKernel*> kernels(in_types.size());
 
   for (size_t i = 0; i < aggregates.size(); ++i) {
     ARROW_ASSIGN_OR_RAISE(auto function,
                           ctx->func_registry()->GetFunction(aggregates[i].function));
-    ARROW_ASSIGN_OR_RAISE(
-        const Kernel* kernel,
-        function->DispatchExact({in_descrs[i], ValueDescr::Array(uint32())}));
+    ARROW_ASSIGN_OR_RAISE(const Kernel* kernel,
+                          function->DispatchExact({in_types[i], uint32()}));
     kernels[i] = static_cast<const HashAggregateKernel*>(kernel);
   }
   return kernels;
@@ -52,7 +51,7 @@ Result<std::vector<const HashAggregateKernel*>> GetKernels(
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs) {
+    const std::vector<Aggregate>& aggregates, const std::vector<TypeHolder>& in_types) {
   std::vector<std::unique_ptr<KernelState>> states(kernels.size());
 
   for (size_t i = 0; i < aggregates.size(); ++i) {
@@ -69,14 +68,13 @@ Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     }
 
     KernelContext kernel_ctx{ctx};
-    ARROW_ASSIGN_OR_RAISE(
-        states[i],
-        kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
-                                                     {
-                                                         in_descrs[i],
-                                                         ValueDescr::Array(uint32()),
-                                                     },
-                                                     options}));
+    ARROW_ASSIGN_OR_RAISE(states[i],
+                          kernels[i]->init(&kernel_ctx, KernelInitArgs{kernels[i],
+                                                                       {
+                                                                           in_types[i],
+                                                                           uint32(),
+                                                                       },
+                                                                       options}));
   }
 
   return std::move(states);
@@ -86,19 +84,16 @@ Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
     const std::vector<const HashAggregateKernel*>& kernels,
     const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
-    const std::vector<ValueDescr>& descrs) {
-  FieldVector fields(descrs.size());
+    const std::vector<TypeHolder>& types) {
+  FieldVector fields(types.size());
 
   for (size_t i = 0; i < kernels.size(); ++i) {
     KernelContext kernel_ctx{ctx};
     kernel_ctx.SetState(states[i].get());
 
-    ARROW_ASSIGN_OR_RAISE(auto descr, kernels[i]->signature->out_type().Resolve(
-                                          &kernel_ctx, {
-                                                           descrs[i],
-                                                           ValueDescr::Array(uint32()),
-                                                       }));
-    fields[i] = field(aggregates[i].function, std::move(descr.type));
+    ARROW_ASSIGN_OR_RAISE(auto type, kernels[i]->signature->out_type().Resolve(
+                                         &kernel_ctx, {types[i], uint32()}));
+    fields[i] = field(aggregates[i].function, type.GetSharedPtr());
   }
   return fields;
 }
@@ -122,18 +117,17 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
     ARROW_ASSIGN_OR_RAISE(ExecBatch args_batch, ExecBatch::Make(arguments));
 
     // Construct and initialize HashAggregateKernels
-    auto argument_descrs = args_batch.GetDescriptors();
+    auto argument_types = args_batch.GetTypes();
 
-    ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates, argument_descrs));
+    ARROW_ASSIGN_OR_RAISE(kernels, GetKernels(ctx, aggregates, argument_types));
 
     states.resize(task_group->parallelism());
     for (auto& state : states) {
-      ARROW_ASSIGN_OR_RAISE(state,
-                            InitKernels(kernels, ctx, aggregates, argument_descrs));
+      ARROW_ASSIGN_OR_RAISE(state, InitKernels(kernels, ctx, aggregates, argument_types));
     }
 
     ARROW_ASSIGN_OR_RAISE(
-        out_fields, ResolveKernels(aggregates, kernels, states[0], ctx, argument_descrs));
+        out_fields, ResolveKernels(aggregates, kernels, states[0], ctx, argument_types));
 
     ARROW_ASSIGN_OR_RAISE(
         argument_batch_iterator,
@@ -142,19 +136,19 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
   // Construct Groupers
   ARROW_ASSIGN_OR_RAISE(ExecBatch keys_batch, ExecBatch::Make(keys));
-  auto key_descrs = keys_batch.GetDescriptors();
+  auto key_types = keys_batch.GetTypes();
 
   std::vector<std::unique_ptr<Grouper>> groupers(task_group->parallelism());
   for (auto& grouper : groupers) {
-    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_descrs, ctx));
+    ARROW_ASSIGN_OR_RAISE(grouper, Grouper::Make(key_types, ctx));
   }
 
   std::mutex mutex;
   std::unordered_map<std::thread::id, size_t> thread_ids;
 
   int i = 0;
-  for (ValueDescr& key_descr : key_descrs) {
-    out_fields.push_back(field("key_" + std::to_string(i++), std::move(key_descr.type)));
+  for (const TypeHolder& key_type : key_types) {
+    out_fields.push_back(field("key_" + std::to_string(i++), key_type.GetSharedPtr()));
   }
 
   ARROW_ASSIGN_OR_RAISE(
diff --git a/cpp/src/arrow/compute/exec/aggregate.h b/cpp/src/arrow/compute/exec/aggregate.h
index 753b0a8c47e..72990f3b6e7 100644
--- a/cpp/src/arrow/compute/exec/aggregate.h
+++ b/cpp/src/arrow/compute/exec/aggregate.h
@@ -42,17 +42,17 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat
 
 Result<std::vector<const HashAggregateKernel*>> GetKernels(
     ExecContext* ctx, const std::vector<Aggregate>& aggregates,
-    const std::vector<ValueDescr>& in_descrs);
+    const std::vector<TypeHolder>& in_types);
 
 Result<std::vector<std::unique_ptr<KernelState>>> InitKernels(
     const std::vector<const HashAggregateKernel*>& kernels, ExecContext* ctx,
-    const std::vector<Aggregate>& aggregates, const std::vector<ValueDescr>& in_descrs);
+    const std::vector<Aggregate>& aggregates, const std::vector<TypeHolder>& in_types);
 
 Result<FieldVector> ResolveKernels(
     const std::vector<Aggregate>& aggregates,
     const std::vector<const HashAggregateKernel*>& kernels,
     const std::vector<std::unique_ptr<KernelState>>& states, ExecContext* ctx,
-    const std::vector<ValueDescr>& descrs);
+    const std::vector<TypeHolder>& in_types);
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec/aggregate_node.cc b/cpp/src/arrow/compute/exec/aggregate_node.cc
index 8c7899c41ec..0131319be3b 100644
--- a/cpp/src/arrow/compute/exec/aggregate_node.cc
+++ b/cpp/src/arrow/compute/exec/aggregate_node.cc
@@ -104,8 +104,7 @@ class ScalarAggregateNode : public ExecNode {
                                aggregates[i].function);
       }
 
-      auto in_type = ValueDescr::Array(input_schema.field(target_field_ids[i])->type());
-
+      TypeHolder in_type(input_schema.field(target_field_ids[i])->type().get());
       ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, function->DispatchExact({in_type}));
       kernels[i] = static_cast<const ScalarAggregateKernel*>(kernel);
 
@@ -125,10 +124,10 @@ class ScalarAggregateNode : public ExecNode {
 
       // pick one to resolve the kernel signature
       kernel_ctx.SetState(states[i][0].get());
-      ARROW_ASSIGN_OR_RAISE(
-          auto descr, kernels[i]->signature->out_type().Resolve(&kernel_ctx, {in_type}));
+      ARROW_ASSIGN_OR_RAISE(auto out_type, kernels[i]->signature->out_type().Resolve(
+                                               &kernel_ctx, {in_type}));
 
-      fields[i] = field(aggregate_options.aggregates[i].name, std::move(descr.type));
+      fields[i] = field(aggregate_options.aggregates[i].name, out_type.GetSharedPtr());
     }
 
     return plan->EmplaceNode<ScalarAggregateNode>(
@@ -313,25 +312,24 @@ class GroupByNode : public ExecNode {
     }
 
     // Build vector of aggregate source field data types
-    std::vector<ValueDescr> agg_src_descrs(aggs.size());
+    std::vector<TypeHolder> agg_src_types(aggs.size());
     for (size_t i = 0; i < aggs.size(); ++i) {
       auto agg_src_field_id = agg_src_field_ids[i];
-      agg_src_descrs[i] =
-          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+      agg_src_types[i] = input_schema->field(agg_src_field_id)->type().get();
     }
 
     auto ctx = input->plan()->exec_context();
 
     // Construct aggregates
     ARROW_ASSIGN_OR_RAISE(auto agg_kernels,
-                          internal::GetKernels(ctx, aggs, agg_src_descrs));
+                          internal::GetKernels(ctx, aggs, agg_src_types));
 
     ARROW_ASSIGN_OR_RAISE(auto agg_states,
-                          internal::InitKernels(agg_kernels, ctx, aggs, agg_src_descrs));
+                          internal::InitKernels(agg_kernels, ctx, aggs, agg_src_types));
 
     ARROW_ASSIGN_OR_RAISE(
         FieldVector agg_result_fields,
-        internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_descrs));
+        internal::ResolveKernels(aggs, agg_kernels, agg_states, ctx, agg_src_types));
 
     // Build field vector for output schema
     FieldVector output_fields{keys.size() + aggs.size()};
@@ -621,26 +619,24 @@ class GroupByNode : public ExecNode {
     if (state->grouper != nullptr) return Status::OK();
 
     // Build vector of key field data types
-    std::vector<ValueDescr> key_descrs(key_field_ids_.size());
+    std::vector<TypeHolder> key_types(key_field_ids_.size());
     for (size_t i = 0; i < key_field_ids_.size(); ++i) {
       auto key_field_id = key_field_ids_[i];
-      key_descrs[i] = ValueDescr(input_schema->field(key_field_id)->type());
+      key_types[i] = input_schema->field(key_field_id)->type().get();
     }
 
     // Construct grouper
-    ARROW_ASSIGN_OR_RAISE(state->grouper, Grouper::Make(key_descrs, ctx_));
+    ARROW_ASSIGN_OR_RAISE(state->grouper, Grouper::Make(key_types, ctx_));
 
     // Build vector of aggregate source field data types
-    std::vector<ValueDescr> agg_src_descrs(agg_kernels_.size());
+    std::vector<TypeHolder> agg_src_types(agg_kernels_.size());
     for (size_t i = 0; i < agg_kernels_.size(); ++i) {
       auto agg_src_field_id = agg_src_field_ids_[i];
-      agg_src_descrs[i] =
-          ValueDescr(input_schema->field(agg_src_field_id)->type(), ValueDescr::ARRAY);
+      agg_src_types[i] = input_schema->field(agg_src_field_id)->type().get();
     }
 
-    ARROW_ASSIGN_OR_RAISE(
-        state->agg_states,
-        internal::InitKernels(agg_kernels_, ctx_, aggs_, agg_src_descrs));
+    ARROW_ASSIGN_OR_RAISE(state->agg_states, internal::InitKernels(agg_kernels_, ctx_,
+                                                                   aggs_, agg_src_types));
 
     return Status::OK();
   }
diff --git a/cpp/src/arrow/compute/exec/expression.cc b/cpp/src/arrow/compute/exec/expression.cc
index b796f5cda3b..c890b3c5935 100644
--- a/cpp/src/arrow/compute/exec/expression.cc
+++ b/cpp/src/arrow/compute/exec/expression.cc
@@ -64,7 +64,7 @@ Expression::Expression(Parameter parameter)
 Expression literal(Datum lit) { return Expression(std::move(lit)); }
 
 Expression field_ref(FieldRef ref) {
-  return Expression(Expression::Parameter{std::move(ref), ValueDescr{}, {-1}});
+  return Expression(Expression::Parameter{std::move(ref), TypeHolder{}, {-1}});
 }
 
 Expression call(std::string function, std::vector<Expression> arguments,
@@ -93,36 +93,18 @@ const Expression::Call* Expression::call() const {
   return util::get_if<Call>(impl_.get());
 }
 
-ValueDescr Expression::descr() const {
-  if (impl_ == nullptr) return {};
+const DataType* Expression::type() const {
+  if (impl_ == nullptr) return nullptr;
 
-  if (auto lit = literal()) {
-    return lit->descr();
-  }
-
-  if (auto parameter = this->parameter()) {
-    return parameter->descr;
-  }
-
-  return CallNotNull(*this)->descr;
-}
-
-// This is a module-global singleton to avoid synchronization costs of a
-// function-static singleton.
-static const std::shared_ptr<DataType> kNoType;
-
-const std::shared_ptr<DataType>& Expression::type() const {
-  if (impl_ == nullptr) return kNoType;
-
-  if (auto lit = literal()) {
-    return lit->type();
+  if (const Datum* lit = literal()) {
+    return lit->type().get();
   }
 
-  if (auto parameter = this->parameter()) {
-    return parameter->descr.type;
+  if (const Parameter* parameter = this->parameter()) {
+    return parameter->type.type;
   }
 
-  return CallNotNull(*this)->descr.type;
+  return CallNotNull(*this)->type.type;
 }
 
 namespace {
@@ -276,7 +258,7 @@ size_t Expression::hash() const {
 bool Expression::IsBound() const {
   if (type() == nullptr) return false;
 
-  if (auto call = this->call()) {
+  if (const Call* call = this->call()) {
     if (call->kernel == nullptr) return false;
 
     for (const Expression& arg : call->arguments) {
@@ -338,7 +320,7 @@ util::optional<compute::NullHandling::type> GetNullHandling(
 }  // namespace
 
 bool Expression::IsSatisfiable() const {
-  if (!type()) return true;
+  if (type() == nullptr) return true;
   if (type()->id() != Type::BOOL) return true;
 
   if (auto lit = literal()) {
@@ -382,25 +364,20 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
   DCHECK(std::all_of(call.arguments.begin(), call.arguments.end(),
                      [](const Expression& argument) { return argument.IsBound(); }));
 
-  auto descrs = GetDescriptors(call.arguments);
+  std::vector<TypeHolder> types = GetTypes(call.arguments);
   ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context));
 
   if (!insert_implicit_casts) {
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(descrs));
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchExact(types));
   } else {
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&descrs));
+    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types));
 
-    for (size_t i = 0; i < descrs.size(); ++i) {
-      if (descrs[i] == call.arguments[i].descr()) continue;
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (types[i] == call.arguments[i].type()) continue;
 
-      if (descrs[i].shape != call.arguments[i].descr().shape) {
-        return Status::NotImplemented(
-            "Automatic broadcasting of scalars arguments to arrays in ",
-            Expression(std::move(call)).ToString());
-      }
-
-      if (auto lit = call.arguments[i].literal()) {
-        ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, descrs[i].type));
+      if (const Datum* lit = call.arguments[i].literal()) {
+        ARROW_ASSIGN_OR_RAISE(Datum new_lit,
+                              compute::Cast(*lit, types[i].GetSharedPtr()));
         call.arguments[i] = literal(std::move(new_lit));
         continue;
       }
@@ -409,8 +386,10 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
       Expression::Call implicit_cast;
       implicit_cast.function_name = "cast";
       implicit_cast.arguments = {std::move(call.arguments[i])};
+
+      // TODO(wesm): Use TypeHolder in options
       implicit_cast.options = std::make_shared<compute::CastOptions>(
-          compute::CastOptions::Safe(descrs[i].type));
+          compute::CastOptions::Safe(types[i].GetSharedPtr()));
 
       ARROW_ASSIGN_OR_RAISE(
           call.arguments[i],
@@ -425,43 +404,41 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
         call.options ? call.options.get() : call.function->default_options();
     ARROW_ASSIGN_OR_RAISE(
         call.kernel_state,
-        call.kernel->init(&kernel_context, {call.kernel, descrs, options}));
+        call.kernel->init(&kernel_context, {call.kernel, types, options}));
 
     kernel_context.SetState(call.kernel_state.get());
   }
 
   ARROW_ASSIGN_OR_RAISE(
-      call.descr, call.kernel->signature->out_type().Resolve(&kernel_context, descrs));
+      call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types));
 
   return Expression(std::move(call));
 }
 
 template <typename TypeOrSchema>
 Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
-                            ValueDescr::Shape shape, compute::ExecContext* exec_context) {
+                            compute::ExecContext* exec_context) {
   if (exec_context == nullptr) {
     compute::ExecContext exec_context;
-    return BindImpl(std::move(expr), in, shape, &exec_context);
+    return BindImpl(std::move(expr), in, &exec_context);
   }
 
   if (expr.literal()) return expr;
 
-  if (auto ref = expr.field_ref()) {
-    ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
+  if (const FieldRef* ref = expr.field_ref()) {
+    ARROW_ASSIGN_OR_RAISE(FieldPath path, ref->FindOne(in));
 
-    auto bound = *expr.parameter();
-    bound.indices.resize(path.indices().size());
-    std::copy(path.indices().begin(), path.indices().end(), bound.indices.begin());
+    Expression::Parameter param = *expr.parameter();
+    param.indices.resize(path.indices().size());
+    std::copy(path.indices().begin(), path.indices().end(), param.indices.begin());
     ARROW_ASSIGN_OR_RAISE(auto field, path.Get(in));
-    bound.descr.type = field->type();
-    bound.descr.shape = shape;
-    return Expression{std::move(bound)};
+    param.type = field->type();
+    return Expression{std::move(param)};
   }
 
   auto call = *CallNotNull(expr);
   for (auto& argument : call.arguments) {
-    ARROW_ASSIGN_OR_RAISE(argument,
-                          BindImpl(std::move(argument), in, shape, exec_context));
+    ARROW_ASSIGN_OR_RAISE(argument, BindImpl(std::move(argument), in, exec_context));
   }
   return BindNonRecursive(std::move(call),
                           /*insert_implicit_casts=*/true, exec_context);
@@ -469,14 +446,14 @@ Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in,
 
 }  // namespace
 
-Result<Expression> Expression::Bind(const ValueDescr& in,
+Result<Expression> Expression::Bind(const TypeHolder& in,
                                     compute::ExecContext* exec_context) const {
-  return BindImpl(*this, *in.type, in.shape, exec_context);
+  return BindImpl(*this, *in.type, exec_context);
 }
 
 Result<Expression> Expression::Bind(const Schema& in_schema,
                                     compute::ExecContext* exec_context) const {
-  return BindImpl(*this, in_schema, ValueDescr::ARRAY, exec_context);
+  return BindImpl(*this, in_schema, exec_context);
 }
 
 Result<ExecBatch> MakeExecBatch(const Schema& full_schema, const Datum& partial) {
@@ -558,7 +535,7 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   if (auto lit = expr.literal()) return *lit;
 
   if (auto param = expr.parameter()) {
-    if (param->descr.type->id() == Type::NA) {
+    if (param->type.id() == Type::NA) {
       return MakeNullScalar(null());
     }
 
@@ -569,10 +546,10 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
       ARROW_ASSIGN_OR_RAISE(
           field, compute::CallFunction("struct_field", {std::move(field)}, &options));
     }
-    if (!field.type()->Equals(param->descr.type)) {
+    if (!field.type()->Equals(*param->type.type)) {
       return Status::Invalid("Referenced field ", expr.ToString(), " was ",
                              field.type()->ToString(), " but should have been ",
-                             param->descr.type->ToString());
+                             param->type.ToString());
     }
 
     return field;
@@ -596,10 +573,10 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   compute::KernelContext kernel_context(exec_context, call->kernel);
   kernel_context.SetState(call->kernel_state.get());
 
-  auto kernel = call->kernel;
-  auto descrs = GetDescriptors(arguments);
+  const Kernel* kernel = call->kernel;
+  std::vector<TypeHolder> types = GetTypes(arguments);
   auto options = call->options.get();
-  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, descrs, options}));
+  RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, types, options}));
 
   compute::detail::DatumAccumulator listener;
   RETURN_NOT_OK(executor->Execute(
@@ -683,16 +660,16 @@ Result<Expression> FoldConstants(Expression expr) {
         if (GetNullHandling(*call) == compute::NullHandling::INTERSECTION) {
           // kernels which always produce intersected validity can be resolved
           // to null *now* if any of their inputs is a null literal
-          if (!call->descr.type) {
+          if (!call->type.type) {
             return Status::Invalid("Cannot fold constants for unbound expression ",
                                    expr.ToString());
           }
-          for (const auto& argument : call->arguments) {
+          for (const Expression& argument : call->arguments) {
             if (argument.IsNullLiteral()) {
-              if (argument.type()->Equals(*call->descr.type)) {
+              if (argument.type()->Equals(*call->type.type)) {
                 return argument;
               } else {
-                return literal(MakeNullScalar(call->descr.type));
+                return literal(MakeNullScalar(call->type.GetSharedPtr()));
               }
             }
           }
@@ -815,7 +792,7 @@ Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_va
           auto it = known_values.map.find(*ref);
           if (it != known_values.map.end()) {
             Datum lit = it->second;
-            if (lit.descr() == expr.descr()) return literal(std::move(lit));
+            if (lit.type()->Equals(*expr.type())) return literal(std::move(lit));
             // type mismatch, try casting the known value to the correct type
 
             if (expr.type()->id() == Type::DICTIONARY &&
@@ -836,7 +813,7 @@ Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_va
               }
             }
 
-            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type()));
+            ARROW_ASSIGN_OR_RAISE(lit, compute::Cast(lit, expr.type()->GetSharedPtr()));
             return literal(std::move(lit));
           }
         }
diff --git a/cpp/src/arrow/compute/exec/expression.h b/cpp/src/arrow/compute/exec/expression.h
index a1765d0fcca..e9026961aa9 100644
--- a/cpp/src/arrow/compute/exec/expression.h
+++ b/cpp/src/arrow/compute/exec/expression.h
@@ -55,7 +55,7 @@ class ARROW_EXPORT Expression {
     std::shared_ptr<Function> function;
     const Kernel* kernel = NULLPTR;
     std::shared_ptr<KernelState> kernel_state;
-    ValueDescr descr;
+    TypeHolder type;
 
     void ComputeHash();
   };
@@ -70,7 +70,7 @@ class ARROW_EXPORT Expression {
   /// Bind this expression to the given input type, looking up Kernels and field types.
   /// Some expression simplification may be performed and implicit casts will be inserted.
   /// Any state necessary for execution will be initialized and returned.
-  Result<Expression> Bind(const ValueDescr& in, ExecContext* = NULLPTR) const;
+  Result<Expression> Bind(const TypeHolder& in, ExecContext* = NULLPTR) const;
   Result<Expression> Bind(const Schema& in_schema, ExecContext* = NULLPTR) const;
 
   // XXX someday
@@ -82,8 +82,8 @@ class ARROW_EXPORT Expression {
   // Result<ExpressionState> CloneState() const;
   // Status SetState(ExpressionState);
 
-  /// Return true if all an expression's field references have explicit ValueDescr and all
-  /// of its functions' kernels are looked up.
+  /// Return true if all an expression's field references have explicit types
+  /// and all of its functions' kernels are looked up.
   bool IsBound() const;
 
   /// Return true if this expression is composed only of Scalar literals, field
@@ -107,9 +107,8 @@ class ARROW_EXPORT Expression {
   /// Access a FieldRef or return nullptr if this expression is not a field_ref
   const FieldRef* field_ref() const;
 
-  /// The type and shape to which this expression will evaluate
-  ValueDescr descr() const;
-  const std::shared_ptr<DataType>& type() const;
+  /// The type to which this expression will evaluate
+  const DataType* type() const;
   // XXX someday
   // NullGeneralization::type nullable() const;
 
@@ -117,7 +116,7 @@ class ARROW_EXPORT Expression {
     FieldRef ref;
 
     // post-bind properties
-    ValueDescr descr;
+    TypeHolder type;
     ::arrow::internal::SmallVector<int, 2> indices;
   };
   const Parameter* parameter() const;
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index f8c686d2c81..7490d116c54 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -23,6 +23,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/record_batch.h"
 #include "arrow/table.h"
@@ -31,6 +32,8 @@
 namespace arrow {
 namespace compute {
 
+using internal::GetCastFunction;
+
 struct KnownFieldValues {
   std::unordered_map<FieldRef, Datum, FieldRef::Hash> map;
 };
@@ -41,21 +44,21 @@ inline const Expression::Call* CallNotNull(const Expression& expr) {
   return call;
 }
 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Expression>& exprs) {
-  std::vector<ValueDescr> descrs(exprs.size());
+inline std::vector<TypeHolder> GetTypes(const std::vector<Expression>& exprs) {
+  std::vector<TypeHolder> types(exprs.size());
   for (size_t i = 0; i < exprs.size(); ++i) {
     DCHECK(exprs[i].IsBound());
-    descrs[i] = exprs[i].descr();
+    types[i] = exprs[i].type();
   }
-  return descrs;
+  return types;
 }
 
-inline std::vector<ValueDescr> GetDescriptors(const std::vector<Datum>& values) {
-  std::vector<ValueDescr> descrs(values.size());
+inline std::vector<TypeHolder> GetTypes(const std::vector<Datum>& values) {
+  std::vector<TypeHolder> types(values.size());
   for (size_t i = 0; i < values.size(); ++i) {
-    descrs[i] = values[i].descr();
+    types[i] = values[i].type();
   }
-  return descrs;
+  return types;
 }
 
 struct Comparison {
@@ -281,7 +284,7 @@ inline Result<std::shared_ptr<compute::Function>> GetFunction(
   // XXX this special case is strange; why not make "cast" a ScalarFunction?
   const auto& to_type =
       ::arrow::internal::checked_cast<const compute::CastOptions&>(*call.options).to_type;
-  return compute::GetCastFunction(to_type);
+  return GetCastFunction(to_type);
 }
 
 /// Modify an Expression with pre-order and post-order visitation.
diff --git a/cpp/src/arrow/compute/exec/expression_test.cc b/cpp/src/arrow/compute/exec/expression_test.cc
index 95adb1652eb..b4466d827eb 100644
--- a/cpp/src/arrow/compute/exec/expression_test.cc
+++ b/cpp/src/arrow/compute/exec/expression_test.cc
@@ -493,8 +493,8 @@ TEST(Expression, BindLiteral) {
            Datum(ArrayFromJSON(int32(), "[1,2,3]")),
        }) {
     // literals are always considered bound
-    auto expr = literal(dat);
-    EXPECT_EQ(expr.descr(), dat.descr());
+    Expression expr = literal(dat);
+    EXPECT_TRUE(dat.type()->Equals(*expr.type()));
     EXPECT_TRUE(expr.IsBound());
   }
 }
@@ -518,13 +518,13 @@ void ExpectBindsTo(Expression expr, util::optional<Expression> expected,
 }
 
 TEST(Expression, BindFieldRef) {
-  // an unbound field_ref does not have the output ValueDescr set
+  // an unbound field_ref does not have the output type set
   auto expr = field_ref("alpha");
-  EXPECT_EQ(expr.descr(), ValueDescr{});
+  EXPECT_EQ(expr.type(), nullptr);
   EXPECT_FALSE(expr.IsBound());
 
   ExpectBindsTo(field_ref("i32"), no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   // if the field is not found, an error will be raised
   ASSERT_RAISES(Invalid, field_ref("no such field").Bind(*kBoringSchema));
@@ -541,11 +541,11 @@ TEST(Expression, BindNestedFieldRef) {
 
   ExpectBindsTo(field_ref(FieldRef("a", "b")), no_change, &expr, schema);
   EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ExpectBindsTo(field_ref(FieldRef(FieldPath({0, 0}))), no_change, &expr, schema);
   EXPECT_TRUE(expr.IsBound());
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ASSERT_RAISES(Invalid, field_ref(FieldPath({0, 1})).Bind(schema));
   ASSERT_RAISES(Invalid, field_ref(FieldRef("a", "b"))
@@ -558,7 +558,7 @@ TEST(Expression, BindCall) {
   EXPECT_FALSE(expr.IsBound());
 
   ExpectBindsTo(expr, no_change, &expr);
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
 
   ExpectBindsTo(call("add", {field_ref("f32"), literal(3)}),
                 call("add", {field_ref("f32"), literal(3.0F)}));
@@ -607,7 +607,7 @@ TEST(Expression, BindNestedCall) {
   ASSERT_OK_AND_ASSIGN(expr,
                        expr.Bind(Schema({field("a", int32()), field("b", int32()),
                                          field("c", int32()), field("d", int32())})));
-  EXPECT_EQ(expr.descr(), ValueDescr::Array(int32()));
+  EXPECT_TRUE(expr.type()->Equals(*int32()));
   EXPECT_TRUE(expr.IsBound());
 }
 
@@ -615,7 +615,7 @@ TEST(Expression, ExecuteFieldRef) {
   auto ExpectRefIs = [](FieldRef ref, Datum in, Datum expected) {
     auto expr = field_ref(ref);
 
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.type()));
     ASSERT_OK_AND_ASSIGN(Datum actual,
                          ExecuteScalarExpression(expr, Schema(in.type()->fields()), in));
 
@@ -716,8 +716,8 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
   compute::ExecContext exec_context;
   ARROW_ASSIGN_OR_RAISE(auto function, GetFunction(*call, &exec_context));
 
-  auto descrs = GetDescriptors(call->arguments);
-  ARROW_ASSIGN_OR_RAISE(auto expected_kernel, function->DispatchExact(descrs));
+  std::vector<TypeHolder> types = GetTypes(call->arguments);
+  ARROW_ASSIGN_OR_RAISE(auto expected_kernel, function->DispatchExact(types));
 
   EXPECT_EQ(call->kernel, expected_kernel);
   return function->Execute(arguments, call->options.get(), &exec_context);
@@ -726,7 +726,7 @@ Result<Datum> NaiveExecuteScalarExpression(const Expression& expr, const Datum&
 void ExpectExecute(Expression expr, Datum in, Datum* actual_out = NULLPTR) {
   std::shared_ptr<Schema> schm;
   if (in.is_value()) {
-    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.descr()));
+    ASSERT_OK_AND_ASSIGN(expr, expr.Bind(in.type()));
     schm = schema(in.type()->fields());
   } else {
     ASSERT_OK_AND_ASSIGN(expr, expr.Bind(*in.schema()));
diff --git a/cpp/src/arrow/compute/exec/hash_join.cc b/cpp/src/arrow/compute/exec/hash_join.cc
index a145863e597..a376fb5f57b 100644
--- a/cpp/src/arrow/compute/exec/hash_join.cc
+++ b/cpp/src/arrow/compute/exec/hash_join.cc
@@ -84,13 +84,11 @@ class HashJoinBasicImpl : public HashJoinImpl {
 
  private:
   void InitEncoder(int side, HashJoinProjection projection_handle, RowEncoder* encoder) {
-    std::vector<ValueDescr> data_types;
+    std::vector<TypeHolder> data_types;
     int num_cols = schema_mgr_->proj_maps[side].num_cols(projection_handle);
     data_types.resize(num_cols);
     for (int icol = 0; icol < num_cols; ++icol) {
-      data_types[icol] =
-          ValueDescr(schema_mgr_->proj_maps[side].data_type(projection_handle, icol),
-                     ValueDescr::ARRAY);
+      data_types[icol] = schema_mgr_->proj_maps[side].data_type(projection_handle, icol);
     }
     encoder->Init(data_types, ctx_);
     encoder->Clear();
diff --git a/cpp/src/arrow/compute/exec/hash_join_dict.cc b/cpp/src/arrow/compute/exec/hash_join_dict.cc
index 731a5662d7d..560b0ea8d4d 100644
--- a/cpp/src/arrow/compute/exec/hash_join_dict.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_dict.cc
@@ -224,8 +224,8 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
 
   // Initialize encoder
   internal::RowEncoder encoder;
-  std::vector<ValueDescr> encoder_types;
-  encoder_types.emplace_back(value_type_, ValueDescr::ARRAY);
+  std::vector<TypeHolder> encoder_types;
+  encoder_types.emplace_back(value_type_);
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
@@ -285,8 +285,7 @@ Result<std::shared_ptr<ArrayData>> HashJoinDictBuild::RemapInputValues(
   // Initialize encoder
   //
   internal::RowEncoder encoder;
-  std::vector<ValueDescr> encoder_types;
-  encoder_types.emplace_back(value_type_, ValueDescr::ARRAY);
+  std::vector<TypeHolder> encoder_types = {value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all
@@ -422,8 +421,7 @@ Result<std::shared_ptr<ArrayData>> HashJoinDictProbe::RemapInput(
             remapped_ids_,
             opt_build_side->RemapInputValues(ctx, Datum(dict->data()), dict->length()));
       } else {
-        std::vector<ValueDescr> encoder_types;
-        encoder_types.emplace_back(dict_type.value_type(), ValueDescr::ARRAY);
+        std::vector<TypeHolder> encoder_types = {dict_type.value_type()};
         encoder_.Init(encoder_types, ctx);
         RETURN_NOT_OK(
             encoder_.EncodeAndAppend(ExecSpan({*dict->data()}, dict->length())));
@@ -516,14 +514,14 @@ void HashJoinDictBuildMulti::InitEncoder(
     const SchemaProjectionMaps<HashJoinProjection>& proj_map, RowEncoder* encoder,
     ExecContext* ctx) {
   int num_cols = proj_map.num_cols(HashJoinProjection::KEY);
-  std::vector<ValueDescr> data_types(num_cols);
+  std::vector<TypeHolder> data_types(num_cols);
   for (int icol = 0; icol < num_cols; ++icol) {
     std::shared_ptr<DataType> data_type =
         proj_map.data_type(HashJoinProjection::KEY, icol);
     if (HashJoinDictBuild::KeyNeedsProcessing(data_type)) {
       data_type = HashJoinDictBuild::DataTypeAfterRemapping();
     }
-    data_types[icol] = ValueDescr(data_type, ValueDescr::ARRAY);
+    data_types[icol] = data_type;
   }
   encoder->Init(data_types, ctx);
 }
@@ -610,7 +608,7 @@ void HashJoinDictProbeMulti::InitEncoder(
     const SchemaProjectionMaps<HashJoinProjection>& proj_map_build, RowEncoder* encoder,
     ExecContext* ctx) {
   int num_cols = proj_map_probe.num_cols(HashJoinProjection::KEY);
-  std::vector<ValueDescr> data_types(num_cols);
+  std::vector<TypeHolder> data_types(num_cols);
   for (int icol = 0; icol < num_cols; ++icol) {
     std::shared_ptr<DataType> data_type =
         proj_map_probe.data_type(HashJoinProjection::KEY, icol);
@@ -619,7 +617,7 @@ void HashJoinDictProbeMulti::InitEncoder(
     if (HashJoinDictProbe::KeyNeedsProcessing(data_type, build_data_type)) {
       data_type = HashJoinDictProbe::DataTypeAfterRemapping(build_data_type);
     }
-    data_types[icol] = ValueDescr(data_type, ValueDescr::ARRAY);
+    data_types[icol] = data_type;
   }
   encoder->Init(data_types, ctx);
 }
diff --git a/cpp/src/arrow/compute/exec/hash_join_node_test.cc b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
index 46600a96da3..9a3c7342788 100644
--- a/cpp/src/arrow/compute/exec/hash_join_node_test.cc
+++ b/cpp/src/arrow/compute/exec/hash_join_node_test.cc
@@ -44,13 +44,13 @@ BatchesWithSchema GenerateBatchesFromString(
     const std::vector<util::string_view>& json_strings, int multiplicity = 1) {
   BatchesWithSchema out_batches{{}, schema};
 
-  std::vector<ValueDescr> descrs;
+  std::vector<TypeHolder> types;
   for (auto&& field : schema->fields()) {
-    descrs.emplace_back(field->type());
+    types.emplace_back(field->type());
   }
 
   for (auto&& s : json_strings) {
-    out_batches.batches.push_back(ExecBatchFromJSON(descrs, s));
+    out_batches.batches.push_back(ExecBatchFromJSON(types, s));
   }
 
   size_t batch_count = out_batches.batches.size();
@@ -473,7 +473,7 @@ void TakeUsingVector(ExecContext* ctx, const std::vector<std::shared_ptr<Array>>
   }
 }
 
-// Generate random arrays given list of data type descriptions and null probabilities.
+// Generate random arrays given list of data types and null probabilities.
 // Make sure that all generated records are unique.
 // The actual number of generated records may be lower than desired because duplicates
 // will be removed without replacement.
@@ -485,12 +485,12 @@ std::vector<std::shared_ptr<Array>> GenRandomUniqueRecords(
       GenRandomRecords(rng, data_types.data_types, num_desired);
 
   ExecContext* ctx = default_exec_context();
-  std::vector<ValueDescr> val_descrs;
+  std::vector<TypeHolder> val_types;
   for (size_t i = 0; i < result.size(); ++i) {
-    val_descrs.push_back(ValueDescr(result[i]->type(), ValueDescr::ARRAY));
+    val_types.push_back(result[i]->type());
   }
   internal::RowEncoder encoder;
-  encoder.Init(val_descrs, ctx);
+  encoder.Init(val_types, ctx);
   ExecBatch batch({}, num_desired);
   batch.values.resize(result.size());
   for (size_t i = 0; i < result.size(); ++i) {
diff --git a/cpp/src/arrow/compute/exec/plan_test.cc b/cpp/src/arrow/compute/exec/plan_test.cc
index 9efa6623e5a..f67d541e1ea 100644
--- a/cpp/src/arrow/compute/exec/plan_test.cc
+++ b/cpp/src/arrow/compute/exec/plan_test.cc
@@ -1133,12 +1133,11 @@ TEST(ExecPlanExecution, SourceScalarAggSink) {
           })
           .AddToPlan(plan.get()));
 
-  ASSERT_THAT(
-      StartAndCollect(plan.get(), sink_gen),
-      Finishes(ResultWith(UnorderedElementsAreArray({
-          ExecBatchFromJSON({ValueDescr::Scalar(int64()), ValueDescr::Scalar(boolean())},
-                            "[[22, true]]"),
-      }))));
+  ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
+              Finishes(ResultWith(UnorderedElementsAreArray({
+                  ExecBatchFromJSON({int64(), boolean()},
+                                    {ArgShape::SCALAR, ArgShape::SCALAR}, "[[22, true]]"),
+              }))));
 }
 
 TEST(ExecPlanExecution, AggregationPreservesOptions) {
@@ -1168,7 +1167,7 @@ TEST(ExecPlanExecution, AggregationPreservesOptions) {
 
     ASSERT_THAT(StartAndCollect(plan.get(), sink_gen),
                 Finishes(ResultWith(UnorderedElementsAreArray({
-                    ExecBatchFromJSON({ValueDescr::Array(float64())}, "[[5.5]]"),
+                    ExecBatchFromJSON({float64()}, "[[5.5]]"),
                 }))));
   }
   {
@@ -1209,7 +1208,7 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
 
   BatchesWithSchema scalar_data;
   scalar_data.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::SCALAR, ArgShape::SCALAR},
                         "[[5, false], [5, false], [5, false]]"),
       ExecBatchFromJSON({int32(), boolean()}, "[[5, true], [6, false], [7, true]]")};
   scalar_data.schema = schema({field("a", int32()), field("b", boolean())});
@@ -1239,11 +1238,11 @@ TEST(ExecPlanExecution, ScalarSourceScalarAggSink) {
       StartAndCollect(plan.get(), sink_gen),
       Finishes(ResultWith(UnorderedElementsAreArray({
           ExecBatchFromJSON(
-              {ValueDescr::Scalar(boolean()), ValueDescr::Scalar(boolean()),
-               ValueDescr::Scalar(int64()), ValueDescr::Scalar(float64()),
-               ValueDescr::Scalar(int64()), ValueDescr::Scalar(float64()),
-               ValueDescr::Scalar(int64()), ValueDescr::Array(float64()),
-               ValueDescr::Scalar(float64())},
+              {boolean(), boolean(), int64(), float64(), int64(), float64(), int64(),
+               float64(), float64()},
+              {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
+               ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY,
+               ArgShape::SCALAR},
               R"([[false, true, 6, 5.5, 26250, 0.7637626158259734, 33, 5.0, 0.5833333333333334]])"),
       }))));
 }
@@ -1255,9 +1254,9 @@ TEST(ExecPlanExecution, ScalarSourceGroupedSum) {
 
   BatchesWithSchema scalar_data;
   scalar_data.batches = {
-      ExecBatchFromJSON({int32(), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::ARRAY, ArgShape::SCALAR},
                         "[[5, false], [6, false], [7, false]]"),
-      ExecBatchFromJSON({int32(), ValueDescr::Scalar(boolean())},
+      ExecBatchFromJSON({int32(), boolean()}, {ArgShape::ARRAY, ArgShape::SCALAR},
                         "[[1, true], [2, true], [3, true]]"),
   };
   scalar_data.schema = schema({field("a", int32()), field("b", boolean())});
diff --git a/cpp/src/arrow/compute/exec/project_node.cc b/cpp/src/arrow/compute/exec/project_node.cc
index cad8d7c45ae..de01899b485 100644
--- a/cpp/src/arrow/compute/exec/project_node.cc
+++ b/cpp/src/arrow/compute/exec/project_node.cc
@@ -67,7 +67,7 @@ class ProjectNode : public MapNode {
         ARROW_ASSIGN_OR_RAISE(
             expr, expr.Bind(*inputs[0]->output_schema(), plan->exec_context()));
       }
-      fields[i] = field(std::move(names[i]), expr.type());
+      fields[i] = field(std::move(names[i]), expr.type()->GetSharedPtr());
       ++i;
     }
     return plan->EmplaceNode<ProjectNode>(plan, std::move(inputs),
@@ -82,7 +82,7 @@ class ProjectNode : public MapNode {
     for (size_t i = 0; i < exprs_.size(); ++i) {
       util::tracing::Span span;
       START_COMPUTE_SPAN(span, "Project",
-                         {{"project.descr", exprs_[i].descr().ToString()},
+                         {{"project.type", exprs_[i].type().ToString()},
                           {"project.length", target.length},
                           {"project.expression", exprs_[i].ToString()}});
       ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
diff --git a/cpp/src/arrow/compute/exec/test_util.cc b/cpp/src/arrow/compute/exec/test_util.cc
index 1e09cb742fa..330ee471126 100644
--- a/cpp/src/arrow/compute/exec/test_util.cc
+++ b/cpp/src/arrow/compute/exec/test_util.cc
@@ -143,16 +143,25 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
   return node;
 }
 
-ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
                             util::string_view json) {
   auto fields = ::arrow::internal::MapVector(
-      [](const ValueDescr& descr) { return field("", descr.type); }, descrs);
+      [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types);
 
   ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)};
 
+  return batch;
+}
+
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, util::string_view json) {
+  DCHECK_EQ(types.size(), shapes.size());
+
+  ExecBatch batch = ExecBatchFromJSON(types, json);
+
   auto value_it = batch.values.begin();
-  for (const auto& descr : descrs) {
-    if (descr.shape == ValueDescr::SCALAR) {
+  for (ArgShape shape : shapes) {
+    if (shape == ArgShape::SCALAR) {
       if (batch.length == 0) {
         *value_it = MakeNullScalar(value_it->type());
       } else {
@@ -232,13 +241,13 @@ BatchesWithSchema MakeBatchesFromString(
     const std::vector<util::string_view>& json_strings, int multiplicity) {
   BatchesWithSchema out_batches{{}, schema};
 
-  std::vector<ValueDescr> descrs;
+  std::vector<TypeHolder> types;
   for (auto&& field : schema->fields()) {
-    descrs.emplace_back(field->type());
+    types.emplace_back(field->type());
   }
 
   for (auto&& s : json_strings) {
-    out_batches.batches.push_back(ExecBatchFromJSON(descrs, s));
+    out_batches.batches.push_back(ExecBatchFromJSON(types, s));
   }
 
   size_t batch_count = out_batches.batches.size();
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
index ba7e4bb3411..64f725deafd 100644
--- a/cpp/src/arrow/compute/exec/test_util.h
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -27,6 +27,7 @@
 
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec/exec_plan.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/testing/visibility.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/pcg_random.h"
@@ -44,8 +45,11 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
                         int num_outputs, StartProducingFunc = {}, StopProducingFunc = {});
 
 ARROW_TESTING_EXPORT
-ExecBatch ExecBatchFromJSON(const std::vector<ValueDescr>& descrs,
-                            util::string_view json);
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, util::string_view json);
+
+ARROW_TESTING_EXPORT
+ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
+                            const std::vector<ArgShape>& shapes, util::string_view json);
 
 struct BatchesWithSchema {
   std::vector<ExecBatch> batches;
diff --git a/cpp/src/arrow/compute/exec_internal.h b/cpp/src/arrow/compute/exec_internal.h
index c475a61c1ba..afca289c20e 100644
--- a/cpp/src/arrow/compute/exec_internal.h
+++ b/cpp/src/arrow/compute/exec_internal.h
@@ -84,8 +84,7 @@ class ARROW_EXPORT ExecSpanIterator {
   /// \param[in] batch the input ExecBatch
   /// \param[in] max_chunksize the maximum length of each ExecSpan. Depending
   /// on the chunk layout of ChunkedArray.
-  Status Init(const ExecBatch& batch, ValueDescr::Shape output_shape = ValueDescr::ARRAY,
-              int64_t max_chunksize = kDefaultMaxChunksize);
+  Status Init(const ExecBatch& batch, int64_t max_chunksize = kDefaultMaxChunksize);
 
   /// \brief Compute the next span by updating the state of the
   /// previous span object. You must keep passing in the previous
@@ -101,6 +100,8 @@ class ARROW_EXPORT ExecSpanIterator {
   int64_t length() const { return length_; }
   int64_t position() const { return position_; }
 
+  bool have_all_scalars() const { return have_all_scalars_; }
+
  private:
   ExecSpanIterator(const std::vector<Datum>& args, int64_t length, int64_t max_chunksize);
 
@@ -108,6 +109,7 @@ class ARROW_EXPORT ExecSpanIterator {
 
   bool initialized_ = false;
   bool have_chunked_arrays_ = false;
+  bool have_all_scalars_ = false;
   const std::vector<Datum>* args_;
   std::vector<int> chunk_indexes_;
   std::vector<int64_t> value_positions_;
@@ -117,8 +119,8 @@ class ARROW_EXPORT ExecSpanIterator {
   // from the relative position within each chunk (which is in
   // value_positions_)
   std::vector<int64_t> value_offsets_;
-  int64_t position_;
-  int64_t length_;
+  int64_t position_ = 0;
+  int64_t length_ = 0;
   int64_t max_chunksize_;
 };
 
@@ -147,11 +149,6 @@ class DatumAccumulator : public ExecListener {
   std::vector<Datum> values_;
 };
 
-/// \brief Check that each Datum is of a "value" type, which means either
-/// SCALAR, ARRAY, or CHUNKED_ARRAY. If there are chunked inputs, then these
-/// inputs will be split into non-chunked ExecBatch values for execution
-Status CheckAllValues(const std::vector<Datum>& values);
-
 class ARROW_EXPORT KernelExecutor {
  public:
   virtual ~KernelExecutor() = default;
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index bd344fb2297..573f4aee4a0 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -728,10 +728,10 @@ TEST_F(TestExecBatchIterator, Basics) {
   ASSERT_EQ(3, batch.num_values());
   ASSERT_EQ(length, batch.length);
 
-  std::vector<ValueDescr> descrs = batch.GetDescriptors();
-  ASSERT_EQ(ValueDescr::Array(int32()), descrs[0]);
-  ASSERT_EQ(ValueDescr::Array(float64()), descrs[1]);
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descrs[2]);
+  std::vector<TypeHolder> types = batch.GetTypes();
+  ASSERT_EQ(types[0], int32());
+  ASSERT_EQ(types[1], float64());
+  ASSERT_EQ(types[2], int32());
 
   AssertArraysEqual(*args[0].make_array(), *batch[0].make_array());
   AssertArraysEqual(*args[1].make_array(), *batch[1].make_array());
@@ -795,13 +795,12 @@ TEST_F(TestExecBatchIterator, ZeroLengthInputs) {
 class TestExecSpanIterator : public TestComputeInternals {
  public:
   void SetupIterator(const ExecBatch& batch,
-                     ValueDescr::Shape output_shape = ValueDescr::ARRAY,
                      int64_t max_chunksize = kDefaultMaxChunksize) {
-    ASSERT_OK(iterator_.Init(batch, output_shape, max_chunksize));
+    ASSERT_OK(iterator_.Init(batch, max_chunksize));
   }
   void CheckIteration(const ExecBatch& input, int chunksize,
                       const std::vector<int>& ex_batch_sizes) {
-    SetupIterator(input, ValueDescr::ARRAY, chunksize);
+    SetupIterator(input, chunksize);
     ExecSpan batch;
     int64_t position = 0;
     for (size_t i = 0; i < ex_batch_sizes.size(); ++i) {
@@ -902,8 +901,10 @@ TEST_F(TestExecSpanIterator, ZeroLengthInputs) {
 
   auto CheckArgs = [&](const ExecBatch& batch) {
     ExecSpanIterator iterator;
-    ASSERT_OK(iterator.Init(batch, ValueDescr::ARRAY));
+    ASSERT_OK(iterator.Init(batch));
     ExecSpan iter_span;
+    ASSERT_TRUE(iterator.Next(&iter_span));
+    ASSERT_EQ(0, iter_span.length);
     ASSERT_FALSE(iterator.Next(&iter_span));
   };
 
@@ -1045,11 +1046,13 @@ Status ExecStateful(KernelContext* ctx, const ExecSpan& batch, ExecResult* out)
   return Status::OK();
 }
 
-// TODO: remove this / refactor it in ARROW-16577
 Status ExecAddInt32(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  const Int32Scalar& arg0 = batch[0].scalar_as<Int32Scalar>();
-  const Int32Scalar& arg1 = batch[1].scalar_as<Int32Scalar>();
-  out->value = std::make_shared<Int32Scalar>(arg0.value + arg1.value);
+  const int32_t* left_data = batch[0].array.GetValues<int32_t>(1);
+  const int32_t* right_data = batch[1].array.GetValues<int32_t>(1);
+  int32_t* out_data = out->array_span()->GetValues<int32_t>(1);
+  for (int64_t i = 0; i < batch.length; ++i) {
+    *out_data++ = *left_data++ + *right_data++;
+  }
   return Status::OK();
 }
 
@@ -1078,16 +1081,15 @@ class TestCallScalarFunction : public TestComputeInternals {
                                                  /*doc=*/FunctionDoc::Empty());
 
     // Add a few kernels. Our implementation only accepts arrays
-    ASSERT_OK(func->AddKernel({InputType::Array(uint8())}, uint8(), ExecCopyArraySpan));
-    ASSERT_OK(func->AddKernel({InputType::Array(int32())}, int32(), ExecCopyArraySpan));
-    ASSERT_OK(
-        func->AddKernel({InputType::Array(float64())}, float64(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({uint8()}, uint8(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({int32()}, int32(), ExecCopyArraySpan));
+    ASSERT_OK(func->AddKernel({float64()}, float64(), ExecCopyArraySpan));
     ASSERT_OK(registry->AddFunction(func));
 
     // A version which doesn't want the executor to call PropagateNulls
     auto func2 = std::make_shared<ScalarFunction>(
         "test_copy_computed_bitmap", Arity::Unary(), /*doc=*/FunctionDoc::Empty());
-    ScalarKernel kernel({InputType::Array(uint8())}, uint8(), ExecComputedBitmap);
+    ScalarKernel kernel({uint8()}, uint8(), ExecComputedBitmap);
     kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
     ASSERT_OK(func2->AddKernel(kernel));
     ASSERT_OK(registry->AddFunction(func2));
@@ -1103,7 +1105,7 @@ class TestCallScalarFunction : public TestComputeInternals {
     auto f2 = std::make_shared<ScalarFunction>(
         "test_nopre_validity_or_data", Arity::Unary(), /*doc=*/FunctionDoc::Empty());
 
-    ScalarKernel kernel({InputType::Array(uint8())}, uint8(), ExecNoPreallocatedData);
+    ScalarKernel kernel({uint8()}, uint8(), ExecNoPreallocatedData);
     kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
     ASSERT_OK(f1->AddKernel(kernel));
 
@@ -1123,7 +1125,7 @@ class TestCallScalarFunction : public TestComputeInternals {
     auto func = std::make_shared<ScalarFunction>("test_stateful", Arity::Unary(),
                                                  /*doc=*/FunctionDoc::Empty());
 
-    ScalarKernel kernel({InputType::Array(int32())}, int32(), ExecStateful, InitStateful);
+    ScalarKernel kernel({int32()}, int32(), ExecStateful, InitStateful);
     ASSERT_OK(func->AddKernel(kernel));
     ASSERT_OK(registry->AddFunction(func));
   }
@@ -1133,8 +1135,7 @@ class TestCallScalarFunction : public TestComputeInternals {
 
     auto func = std::make_shared<ScalarFunction>("test_scalar_add_int32", Arity::Binary(),
                                                  /*doc=*/FunctionDoc::Empty());
-    ASSERT_OK(func->AddKernel({InputType::Scalar(int32()), InputType::Scalar(int32())},
-                              int32(), ExecAddInt32));
+    ASSERT_OK(func->AddKernel({int32(), int32()}, int32(), ExecAddInt32));
     ASSERT_OK(registry->AddFunction(func));
   }
 };
@@ -1154,8 +1155,9 @@ TEST_F(TestCallScalarFunction, ArgumentValidation) {
   ASSERT_RAISES(Invalid, CallFunction("test_copy", args));
 
   // Cannot do scalar
-  args = {Datum(std::make_shared<Int32Scalar>(5))};
-  ASSERT_RAISES(NotImplemented, CallFunction("test_copy", args));
+  Datum d1_scalar(std::make_shared<Int32Scalar>(5));
+  ASSERT_OK_AND_ASSIGN(auto result, CallFunction("test_copy", {d1}));
+  ASSERT_OK_AND_ASSIGN(result, CallFunction("test_copy", {d1_scalar}));
 }
 
 TEST_F(TestCallScalarFunction, PreallocationCases) {
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index b5ebc67d180..dd67de023e8 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -79,51 +79,35 @@ static const FunctionDoc kEmptyFunctionDoc{};
 
 const FunctionDoc& FunctionDoc::Empty() { return kEmptyFunctionDoc; }
 
-static Status CheckArityImpl(const Function& function, int passed_num_args,
-                             const char* passed_num_args_label) {
-  if (function.arity().is_varargs && passed_num_args < function.arity().num_args) {
-    return Status::Invalid("VarArgs function '", function.name(), "' needs at least ",
-                           function.arity().num_args, " arguments but ",
-                           passed_num_args_label, " only ", passed_num_args);
+static Status CheckArityImpl(const Function& func, int num_args) {
+  if (func.arity().is_varargs && num_args < func.arity().num_args) {
+    return Status::Invalid("VarArgs function '", func.name(), "' needs at least ",
+                           func.arity().num_args, " arguments but only ", num_args,
+                           " passed");
   }
 
-  if (!function.arity().is_varargs && passed_num_args != function.arity().num_args) {
-    return Status::Invalid("Function '", function.name(), "' accepts ",
-                           function.arity().num_args, " arguments but ",
-                           passed_num_args_label, " ", passed_num_args);
+  if (!func.arity().is_varargs && num_args != func.arity().num_args) {
+    return Status::Invalid("Function '", func.name(), "' accepts ", func.arity().num_args,
+                           " arguments but ", num_args, " passed");
   }
-
   return Status::OK();
 }
 
-Status Function::CheckArity(const std::vector<InputType>& in_types) const {
-  return CheckArityImpl(*this, static_cast<int>(in_types.size()), "kernel accepts");
-}
-
-Status Function::CheckArity(const std::vector<ValueDescr>& descrs) const {
-  return CheckArityImpl(*this, static_cast<int>(descrs.size()),
-                        "attempted to look up kernel(s) with");
-}
-
-static Status CheckOptions(const Function& function, const FunctionOptions* options) {
-  if (options == nullptr && function.doc().options_required) {
-    return Status::Invalid("Function '", function.name(),
-                           "' cannot be called without options");
-  }
-  return Status::OK();
+Status Function::CheckArity(size_t num_args) const {
+  return CheckArityImpl(*this, static_cast<int>(num_args));
 }
 
 namespace detail {
 
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>& descrs) {
+Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>& types) {
   return Status::NotImplemented("Function '", func->name(),
                                 "' has no kernel matching input types ",
-                                ValueDescr::ToString(descrs));
+                                TypeHolder::ToString(types));
 }
 
 template <typename KernelType>
 const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
-                                    const std::vector<ValueDescr>& values) {
+                                    const std::vector<TypeHolder>& values) {
   const KernelType* kernel_matches[SimdLevel::MAX] = {nullptr};
 
   // Validate arity
@@ -159,7 +143,7 @@ const KernelType* DispatchExactImpl(const std::vector<KernelType*>& kernels,
 }
 
 const Kernel* DispatchExactImpl(const Function* func,
-                                const std::vector<ValueDescr>& values) {
+                                const std::vector<TypeHolder>& values) {
   if (func->kind() == Function::SCALAR) {
     return DispatchExactImpl(checked_cast<const ScalarFunction*>(func)->kernels(),
                              values);
@@ -186,11 +170,11 @@ const Kernel* DispatchExactImpl(const Function* func,
 }  // namespace detail
 
 Result<const Kernel*> Function::DispatchExact(
-    const std::vector<ValueDescr>& values) const {
+    const std::vector<TypeHolder>& values) const {
   if (kind_ == Function::META) {
     return Status::NotImplemented("Dispatch for a MetaFunction's Kernels");
   }
-  RETURN_NOT_OK(CheckArity(values));
+  RETURN_NOT_OK(CheckArity(values.size()));
 
   if (auto kernel = detail::DispatchExactImpl(this, values)) {
     return kernel;
@@ -198,32 +182,44 @@ Result<const Kernel*> Function::DispatchExact(
   return detail::NoMatchingKernel(this, values);
 }
 
-Result<const Kernel*> Function::DispatchBest(std::vector<ValueDescr>* values) const {
+Result<const Kernel*> Function::DispatchBest(std::vector<TypeHolder>* values) const {
   // TODO(ARROW-11508) permit generic conversions here
   return DispatchExact(*values);
 }
 
-Result<Datum> Function::Execute(const std::vector<Datum>& args,
-                                const FunctionOptions* options, ExecContext* ctx) const {
-  return ExecuteInternal(args, /*passed_length=*/-1, options, ctx);
+namespace {
+
+/// \brief Check that each Datum is of a "value" type, which means either
+/// SCALAR, ARRAY, or CHUNKED_ARRAY.
+Status CheckAllValues(const std::vector<Datum>& values) {
+  for (const auto& value : values) {
+    if (!value.is_value()) {
+      return Status::Invalid("Tried executing function with non-value type: ",
+                             value.ToString());
+    }
+  }
+  return Status::OK();
 }
 
-Result<Datum> Function::Execute(const ExecBatch& batch, const FunctionOptions* options,
-                                ExecContext* ctx) const {
-  return ExecuteInternal(batch.values, batch.length, options, ctx);
+Status CheckOptions(const Function& function, const FunctionOptions* options) {
+  if (options == nullptr && function.doc().options_required) {
+    return Status::Invalid("Function '", function.name(),
+                           "' cannot be called without options");
+  }
+  return Status::OK();
 }
 
-Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
-                                        int64_t passed_length,
-                                        const FunctionOptions* options,
-                                        ExecContext* ctx) const {
+Result<Datum> ExecuteInternal(const Function& func, std::vector<Datum> args,
+                              int64_t passed_length, const FunctionOptions* options,
+                              ExecContext* ctx) {
+  std::unique_ptr<ExecContext> default_ctx;
   if (options == nullptr) {
-    RETURN_NOT_OK(CheckOptions(*this, options));
-    options = default_options();
+    RETURN_NOT_OK(CheckOptions(func, options));
+    options = func.default_options();
   }
   if (ctx == nullptr) {
-    ExecContext default_ctx;
-    return ExecuteInternal(args, passed_length, options, &default_ctx);
+    default_ctx.reset(new ExecContext());
+    ctx = default_ctx.get();
   }
 
   util::tracing::Span span;
@@ -235,38 +231,45 @@ Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
 
   // type-check Datum arguments here. Really we'd like to avoid this as much as
   // possible
-  RETURN_NOT_OK(detail::CheckAllValues(args));
-  std::vector<ValueDescr> inputs(args.size());
+  RETURN_NOT_OK(CheckAllValues(args));
+  std::vector<TypeHolder> in_types(args.size());
   for (size_t i = 0; i != args.size(); ++i) {
-    inputs[i] = args[i].descr();
+    in_types[i] = args[i].type().get();
   }
 
   std::unique_ptr<detail::KernelExecutor> executor;
-  if (kind() == Function::SCALAR) {
+  if (func.kind() == Function::SCALAR) {
     executor = detail::KernelExecutor::MakeScalar();
-  } else if (kind() == Function::VECTOR) {
+  } else if (func.kind() == Function::VECTOR) {
     executor = detail::KernelExecutor::MakeVector();
-  } else if (kind() == Function::SCALAR_AGGREGATE) {
+  } else if (func.kind() == Function::SCALAR_AGGREGATE) {
     executor = detail::KernelExecutor::MakeScalarAggregate();
   } else {
     return Status::NotImplemented("Direct execution of HASH_AGGREGATE functions");
   }
 
-  ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, DispatchBest(&inputs));
-  ARROW_ASSIGN_OR_RAISE(std::vector<Datum> args_with_casts, Cast(args, inputs, ctx));
+  ARROW_ASSIGN_OR_RAISE(const Kernel* kernel, func.DispatchBest(&in_types));
+
+  // Cast arguments if necessary
+  for (size_t i = 0; i != args.size(); ++i) {
+    if (in_types[i] != args[i].type()) {
+      ARROW_ASSIGN_OR_RAISE(args[i], Cast(args[i], CastOptions::Safe(in_types[i]), ctx));
+    }
+  }
 
-  std::unique_ptr<KernelState> state;
   KernelContext kernel_ctx{ctx, kernel};
+
+  std::unique_ptr<KernelState> state;
   if (kernel->init) {
-    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, inputs, options}));
+    ARROW_ASSIGN_OR_RAISE(state, kernel->init(&kernel_ctx, {kernel, in_types, options}));
     kernel_ctx.SetState(state.get());
   }
 
-  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, inputs, options}));
+  RETURN_NOT_OK(executor->Init(&kernel_ctx, {kernel, in_types, options}));
 
   detail::DatumAccumulator listener;
 
-  ExecBatch input(std::move(args_with_casts), /*length=*/0);
+  ExecBatch input(std::move(args), /*length=*/0);
   if (input.num_values() == 0) {
     if (passed_length != -1) {
       input.length = passed_length;
@@ -275,9 +278,9 @@ Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
     bool all_same_length = false;
     int64_t inferred_length = detail::InferBatchLength(input.values, &all_same_length);
     input.length = inferred_length;
-    if (kind() == Function::SCALAR) {
+    if (func.kind() == Function::SCALAR) {
       DCHECK(passed_length == -1 || passed_length == inferred_length);
-    } else if (kind() == Function::VECTOR) {
+    } else if (func.kind() == Function::VECTOR) {
       auto vkernel = static_cast<const VectorKernel*>(kernel);
       if (!(all_same_length || !vkernel->can_execute_chunkwise)) {
         return Status::Invalid("Vector kernel arguments must all be the same length");
@@ -287,12 +290,25 @@ Result<Datum> Function::ExecuteInternal(const std::vector<Datum>& args,
   RETURN_NOT_OK(executor->Execute(input, &listener));
   const auto out = executor->WrapResults(input.values, listener.values());
 #ifndef NDEBUG
-  DCHECK_OK(executor->CheckResultType(out, name_.c_str()));
+  DCHECK_OK(executor->CheckResultType(out, func.name().c_str()));
 #endif
   return out;
 }
 
+}  // namespace
+
+Result<Datum> Function::Execute(const std::vector<Datum>& args,
+                                const FunctionOptions* options, ExecContext* ctx) const {
+  return ExecuteInternal(*this, args, /*passed_length=*/-1, options, ctx);
+}
+
+Result<Datum> Function::Execute(const ExecBatch& batch, const FunctionOptions* options,
+                                ExecContext* ctx) const {
+  return ExecuteInternal(*this, batch.values, batch.length, options, ctx);
+}
+
 namespace {
+
 Status ValidateFunctionSummary(const std::string& s) {
   if (s.find('\n') != s.npos) {
     return Status::Invalid("summary contains a newline");
@@ -347,7 +363,7 @@ Status Function::Validate() const {
 
 Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types));
+  RETURN_NOT_OK(CheckArity(in_types.size()));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -359,7 +375,7 @@ Status ScalarFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status ScalarFunction::AddKernel(ScalarKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -369,7 +385,7 @@ Status ScalarFunction::AddKernel(ScalarKernel kernel) {
 
 Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out_type,
                                  ArrayKernelExec exec, KernelInit init) {
-  RETURN_NOT_OK(CheckArity(in_types));
+  RETURN_NOT_OK(CheckArity(in_types.size()));
 
   if (arity_.is_varargs && in_types.size() != 1) {
     return Status::Invalid("VarArgs signatures must have exactly one input type");
@@ -381,7 +397,7 @@ Status VectorFunction::AddKernel(std::vector<InputType> in_types, OutputType out
 }
 
 Status VectorFunction::AddKernel(VectorKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -390,7 +406,7 @@ Status VectorFunction::AddKernel(VectorKernel kernel) {
 }
 
 Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -399,7 +415,7 @@ Status ScalarAggregateFunction::AddKernel(ScalarAggregateKernel kernel) {
 }
 
 Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) {
-  RETURN_NOT_OK(CheckArity(kernel.signature->in_types()));
+  RETURN_NOT_OK(CheckArity(kernel.signature->in_types().size()));
   if (arity_.is_varargs && !kernel.signature->is_varargs()) {
     return Status::Invalid("Function accepts varargs but kernel signature does not");
   }
@@ -410,8 +426,7 @@ Status HashAggregateFunction::AddKernel(HashAggregateKernel kernel) {
 Result<Datum> MetaFunction::Execute(const std::vector<Datum>& args,
                                     const FunctionOptions* options,
                                     ExecContext* ctx) const {
-  RETURN_NOT_OK(
-      CheckArityImpl(*this, static_cast<int>(args.size()), "attempted to Execute with"));
+  RETURN_NOT_OK(CheckArityImpl(*this, static_cast<int>(args.size())));
   RETURN_NOT_OK(CheckOptions(*this, options));
 
   if (options == nullptr) {
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index c32c8766a91..7f2fba68caf 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -211,19 +211,19 @@ class ARROW_EXPORT Function {
   virtual int num_kernels() const = 0;
 
   /// \brief Return a kernel that can execute the function given the exact
-  /// argument types (without implicit type casts or scalar->array promotions).
+  /// argument types (without implicit type casts).
   ///
   /// NB: This function is overridden in CastFunction.
-  virtual Result<const Kernel*> DispatchExact(
-      const std::vector<ValueDescr>& values) const;
+  virtual Result<const Kernel*> DispatchExact(const std::vector<TypeHolder>& types) const;
 
   /// \brief Return a best-match kernel that can execute the function given the argument
   /// types, after implicit casts are applied.
   ///
-  /// \param[in,out] values Argument types. An element may be modified to indicate that
-  /// the returned kernel only approximately matches the input value descriptors; callers
-  /// are responsible for casting inputs to the type and shape required by the kernel.
-  virtual Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const;
+  /// \param[in,out] values Argument types. An element may be modified to
+  /// indicate that the returned kernel only approximately matches the input
+  /// value descriptors; callers are responsible for casting inputs to the type
+  /// required by the kernel.
+  virtual Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const;
 
   /// \brief Execute the function eagerly with the passed input arguments with
   /// kernel dispatch, batch iteration, and memory allocation details taken
@@ -255,11 +255,7 @@ class ARROW_EXPORT Function {
         doc_(std::move(doc)),
         default_options_(default_options) {}
 
-  Result<Datum> ExecuteInternal(const std::vector<Datum>& args, int64_t passed_length,
-                                const FunctionOptions* options, ExecContext* ctx) const;
-
-  Status CheckArity(const std::vector<InputType>&) const;
-  Status CheckArity(const std::vector<ValueDescr>&) const;
+  Status CheckArity(size_t num_args) const;
 
   std::string name_;
   Function::Kind kind_;
@@ -294,11 +290,11 @@ class FunctionImpl : public Function {
 
 /// \brief Look up a kernel in a function. If no Kernel is found, nullptr is returned.
 ARROW_EXPORT
-const Kernel* DispatchExactImpl(const Function* func, const std::vector<ValueDescr>&);
+const Kernel* DispatchExactImpl(const Function* func, const std::vector<TypeHolder>&);
 
 /// \brief Return an error message if no Kernel is found.
 ARROW_EXPORT
-Status NoMatchingKernel(const Function* func, const std::vector<ValueDescr>&);
+Status NoMatchingKernel(const Function* func, const std::vector<TypeHolder>&);
 
 }  // namespace detail
 
diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index b508ad047fb..bdd0bb6e986 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -19,6 +19,7 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/compute/api.h"
+#include "arrow/compute/cast_internal.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/memory_pool.h"
 #include "arrow/scalar.h"
@@ -67,14 +68,13 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
   // Repeatedly invoke a trivial Cast with all dispatch outside the hot loop
   random::RandomArrayGenerator rag(kSeed);
 
-  auto int_scalars = ToScalars(rag.Int64(kScalarCount, 0, 1 << 20));
-
+  auto int_array = rag.Int64(1, 0, 1 << 20);
   auto double_type = float64();
   CastOptions cast_options;
   cast_options.to_type = double_type;
-  ASSERT_OK_AND_ASSIGN(auto cast_function, GetCastFunction(double_type));
+  ASSERT_OK_AND_ASSIGN(auto cast_function, internal::GetCastFunction(double_type));
   ASSERT_OK_AND_ASSIGN(auto cast_kernel,
-                       cast_function->DispatchExact({int_scalars[0]->type}));
+                       cast_function->DispatchExact({int_array->type()}));
   const auto& exec = static_cast<const ScalarKernel*>(cast_kernel)->exec;
 
   ExecContext exec_context;
@@ -85,15 +85,13 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
                         .ValueOrDie();
   kernel_context.SetState(cast_state.get());
 
-  ExecSpan input;
-  input.length = 1;
+  ExecSpan input({ExecValue(*int_array->data())}, 1);
+  ExecResult result;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> result_space,
+                       MakeArrayOfNull(double_type, 1));
+  result.array_span()->SetMembers(*result_space->data());
   for (auto _ : state) {
-    ExecResult result;
-    result.value = MakeNullScalar(double_type);
-    for (const std::shared_ptr<Scalar>& int_scalar : int_scalars) {
-      input.values = {ExecValue(int_scalar.get())};
-      ABORT_NOT_OK(exec(&kernel_context, input, &result));
-    }
+    ABORT_NOT_OK(exec(&kernel_context, input, &result));
   }
 
   state.SetItemsProcessed(state.iterations() * kScalarCount);
@@ -153,31 +151,26 @@ void BM_ExecuteScalarFunctionOnScalar(benchmark::State& state) {
 
 void BM_ExecuteScalarKernelOnScalar(benchmark::State& state) {
   // Execute a trivial function, with argument dispatch outside the hot path
-  const int64_t N = 10000;
-
   auto function = *GetFunctionRegistry()->GetFunction("is_valid");
-  auto kernel = *function->DispatchExact({ValueDescr::Scalar(int64())});
+  auto kernel = *function->DispatchExact({int64()});
   const auto& exec = static_cast<const ScalarKernel&>(*kernel).exec;
 
-  const auto scalars = MakeScalarsForIsValid(N);
-
   ExecContext exec_context;
   KernelContext kernel_context(&exec_context);
 
-  ExecSpan input;
-  input.length = 1;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> input_arr, MakeArrayOfNull(int64(), 1));
+  ExecSpan input({*input_arr->data()}, 1);
+
+  ExecResult output;
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> output_arr, MakeArrayOfNull(int64(), 1));
+  output.array_span()->SetMembers(*output_arr->data());
+
+  const int64_t N = 10000;
   for (auto _ : state) {
-    int64_t total = 0;
-    for (const std::shared_ptr<Scalar>& scalar : scalars) {
-      ExecResult result;
-      result.value = MakeNullScalar(int64());
-      input.values = {scalar.get()};
-      ABORT_NOT_OK(exec(&kernel_context, input, &result));
-      total += result.scalar()->is_valid;
+    for (int i = 0; i < N; ++i) {
+      ABORT_NOT_OK(exec(&kernel_context, input, &output));
     }
-    benchmark::DoNotOptimize(total);
   }
-
   state.SetItemsProcessed(state.iterations() * N);
 }
 
diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h
index f2303b87d90..17261332619 100644
--- a/cpp/src/arrow/compute/function_internal.h
+++ b/cpp/src/arrow/compute/function_internal.h
@@ -345,6 +345,10 @@ static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
   return MakeNullScalar(value);
 }
 
+static inline Result<std::shared_ptr<Scalar>> GenericToScalar(const TypeHolder& value) {
+  return GenericToScalar(value.GetSharedPtr());
+}
+
 static inline Result<std::shared_ptr<Scalar>> GenericToScalar(
     const std::shared_ptr<Scalar>& value) {
   return value;
@@ -430,6 +434,12 @@ static inline enable_if_same_result<T, std::shared_ptr<DataType>> GenericFromSca
   return value->type;
 }
 
+template <typename T>
+static inline enable_if_same_result<T, TypeHolder> GenericFromScalar(
+    const std::shared_ptr<Scalar>& value) {
+  return value->type;
+}
+
 template <typename T>
 static inline enable_if_same_result<T, std::shared_ptr<Scalar>> GenericFromScalar(
     const std::shared_ptr<Scalar>& value) {
diff --git a/cpp/src/arrow/compute/function_test.cc b/cpp/src/arrow/compute/function_test.cc
index f06f225f5b9..94daa6baa96 100644
--- a/cpp/src/arrow/compute/function_test.cc
+++ b/cpp/src/arrow/compute/function_test.cc
@@ -230,9 +230,9 @@ void CheckAddDispatch(FunctionType* func, ExecType exec) {
   // Duplicate sig is okay
   ASSERT_OK(func->AddKernel(in_types1, out_type1, exec));
 
-  // Add given a descr
-  KernelType descr({float64(), float64()}, float64(), exec);
-  ASSERT_OK(func->AddKernel(descr));
+  // Add a kernel
+  KernelType kernel({float64(), float64()}, float64(), exec);
+  ASSERT_OK(func->AddKernel(kernel));
 
   ASSERT_EQ(4, func->num_kernels());
   ASSERT_EQ(4, func->kernels().size());
@@ -249,9 +249,9 @@ void CheckAddDispatch(FunctionType* func, ExecType exec) {
   KernelType invalid_kernel({boolean()}, boolean(), exec);
   ASSERT_RAISES(Invalid, func->AddKernel(invalid_kernel));
 
-  ASSERT_OK_AND_ASSIGN(const Kernel* kernel, func->DispatchExact({int32(), int32()}));
+  ASSERT_OK_AND_ASSIGN(const Kernel* dispatched, func->DispatchExact({int32(), int32()}));
   KernelSignature expected_sig(in_types1, out_type1);
-  ASSERT_TRUE(kernel->signature->Equals(expected_sig));
+  ASSERT_TRUE(dispatched->signature->Equals(expected_sig));
 
   // No kernel available
   ASSERT_RAISES(NotImplemented, func->DispatchExact({utf8(), utf8()}));
@@ -288,7 +288,7 @@ TEST(ArrayFunction, VarArgs) {
   ScalarKernel non_va_kernel(std::make_shared<KernelSignature>(va_args, int8()), ExecNYI);
   ASSERT_RAISES(Invalid, va_func.AddKernel(non_va_kernel));
 
-  std::vector<ValueDescr> args = {ValueDescr::Scalar(int8()), int8(), int8()};
+  std::vector<TypeHolder> args = {int8(), int8(), int8()};
   ASSERT_OK_AND_ASSIGN(const Kernel* kernel, va_func.DispatchExact(args));
   ASSERT_TRUE(kernel->signature->MatchesInputs(args));
 
@@ -319,7 +319,7 @@ Status NoopFinalize(KernelContext*, Datum*) { return Status::OK(); }
 TEST(ScalarAggregateFunction, DispatchExact) {
   ScalarAggregateFunction func("agg_test", Arity::Unary(), FunctionDoc::Empty());
 
-  std::vector<InputType> in_args = {ValueDescr::Array(int8())};
+  std::vector<InputType> in_args = {int8()};
   ScalarAggregateKernel kernel(std::move(in_args), int64(), NoopInit, NoopConsume,
                                NoopMerge, NoopFinalize);
   ASSERT_OK(func.AddKernel(kernel));
@@ -341,18 +341,14 @@ TEST(ScalarAggregateFunction, DispatchExact) {
   kernel.signature = std::make_shared<KernelSignature>(in_args, float64());
   ASSERT_RAISES(Invalid, func.AddKernel(kernel));
 
-  std::vector<ValueDescr> dispatch_args = {ValueDescr::Array(int8())};
+  std::vector<TypeHolder> dispatch_args = {int8()};
   ASSERT_OK_AND_ASSIGN(const Kernel* selected_kernel, func.DispatchExact(dispatch_args));
   ASSERT_EQ(func.kernels()[0], selected_kernel);
   ASSERT_TRUE(selected_kernel->signature->MatchesInputs(dispatch_args));
 
-  // We declared that only arrays are accepted
-  dispatch_args[0] = {ValueDescr::Scalar(int8())};
-  ASSERT_RAISES(NotImplemented, func.DispatchExact(dispatch_args));
-
   // Didn't qualify the float64() kernel so this actually dispatches (even
   // though that may not be what you want)
-  dispatch_args[0] = {ValueDescr::Scalar(float64())};
+  dispatch_args[0] = {float64()};
   ASSERT_OK_AND_ASSIGN(selected_kernel, func.DispatchExact(dispatch_args));
   ASSERT_TRUE(selected_kernel->signature->MatchesInputs(dispatch_args));
 }
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 909c2399c8e..1e3303473ef 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -87,7 +87,9 @@ class SameTypeIdMatcher : public TypeMatcher {
  public:
   explicit SameTypeIdMatcher(Type::type accepted_id) : accepted_id_(accepted_id) {}
 
-  bool Matches(const DataType& type) const override { return type.id() == accepted_id_; }
+  bool Matches(const TypeHolder& type) const override {
+    return type.id() == accepted_id_;
+  }
 
   std::string ToString() const override {
     std::stringstream ss;
@@ -122,11 +124,11 @@ class TimeUnitMatcher : public TypeMatcher {
   explicit TimeUnitMatcher(TimeUnit::type accepted_unit)
       : accepted_unit_(accepted_unit) {}
 
-  bool Matches(const DataType& type) const override {
+  bool Matches(const TypeHolder& type) const override {
     if (type.id() != ArrowType::type_id) {
       return false;
     }
-    const auto& time_type = checked_cast<const ArrowType&>(type);
+    const auto& time_type = checked_cast<const ArrowType&>(*type.type);
     return time_type.unit() == accepted_unit_;
   }
 
@@ -177,7 +179,7 @@ class IntegerMatcher : public TypeMatcher {
  public:
   IntegerMatcher() {}
 
-  bool Matches(const DataType& type) const override { return is_integer(type.id()); }
+  bool Matches(const TypeHolder& type) const override { return is_integer(type.id()); }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -196,7 +198,7 @@ class PrimitiveMatcher : public TypeMatcher {
  public:
   PrimitiveMatcher() {}
 
-  bool Matches(const DataType& type) const override { return is_primitive(type.id()); }
+  bool Matches(const TypeHolder& type) const override { return is_primitive(type.id()); }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -215,7 +217,9 @@ class BinaryLikeMatcher : public TypeMatcher {
  public:
   BinaryLikeMatcher() {}
 
-  bool Matches(const DataType& type) const override { return is_binary_like(type.id()); }
+  bool Matches(const TypeHolder& type) const override {
+    return is_binary_like(type.id());
+  }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -235,7 +239,7 @@ class LargeBinaryLikeMatcher : public TypeMatcher {
  public:
   LargeBinaryLikeMatcher() {}
 
-  bool Matches(const DataType& type) const override {
+  bool Matches(const TypeHolder& type) const override {
     return is_large_binary_like(type.id());
   }
 
@@ -253,7 +257,7 @@ class FixedSizeBinaryLikeMatcher : public TypeMatcher {
  public:
   FixedSizeBinaryLikeMatcher() {}
 
-  bool Matches(const DataType& type) const override {
+  bool Matches(const TypeHolder& type) const override {
     return is_fixed_size_binary(type.id());
   }
 
@@ -282,7 +286,6 @@ std::shared_ptr<TypeMatcher> FixedSizeBinaryLike() {
 
 size_t InputType::Hash() const {
   size_t result = kHashSeed;
-  hash_combine(result, static_cast<int>(shape_));
   hash_combine(result, static_cast<int>(kind_));
   switch (kind_) {
     case InputType::EXACT_TYPE:
@@ -296,21 +299,6 @@ size_t InputType::Hash() const {
 
 std::string InputType::ToString() const {
   std::stringstream ss;
-  switch (shape_) {
-    case ValueDescr::ANY:
-      ss << "any";
-      break;
-    case ValueDescr::ARRAY:
-      ss << "array";
-      break;
-    case ValueDescr::SCALAR:
-      ss << "scalar";
-      break;
-    default:
-      DCHECK(false);
-      break;
-  }
-  ss << "[";
   switch (kind_) {
     case InputType::ANY_TYPE:
       ss << "any";
@@ -325,7 +313,6 @@ std::string InputType::ToString() const {
       DCHECK(false);
       break;
   }
-  ss << "]";
   return ss.str();
 }
 
@@ -333,7 +320,7 @@ bool InputType::Equals(const InputType& other) const {
   if (this == &other) {
     return true;
   }
-  if (kind_ != other.kind_ || shape_ != other.shape_) {
+  if (kind_ != other.kind_) {
     return false;
   }
   switch (kind_) {
@@ -348,22 +335,30 @@ bool InputType::Equals(const InputType& other) const {
   }
 }
 
-bool InputType::Matches(const ValueDescr& descr) const {
-  if (shape_ != ValueDescr::ANY && descr.shape != shape_) {
-    return false;
-  }
+bool InputType::Matches(const TypeHolder& type) const {
   switch (kind_) {
     case InputType::EXACT_TYPE:
-      return type_->Equals(*descr.type);
+      return type_->Equals(*type.type);
     case InputType::USE_TYPE_MATCHER:
-      return type_matcher_->Matches(*descr.type);
+      return type_matcher_->Matches(type);
     default:
       // ANY_TYPE
       return true;
   }
 }
 
-bool InputType::Matches(const Datum& value) const { return Matches(value.descr()); }
+bool InputType::Matches(const Datum& value) const {
+  switch (value.kind()) {
+    case Datum::ARRAY:
+    case Datum::CHUNKED_ARRAY:
+    case Datum::SCALAR:
+      break;
+    default:
+      DCHECK(false);
+      return false;
+  }
+  return Matches(value.type().get());
+}
 
 const std::shared_ptr<DataType>& InputType::type() const {
   DCHECK_EQ(InputType::EXACT_TYPE, kind_);
@@ -378,21 +373,12 @@ const TypeMatcher& InputType::type_matcher() const {
 // ----------------------------------------------------------------------
 // OutputType
 
-OutputType::OutputType(ValueDescr descr) : OutputType(descr.type) {
-  shape_ = descr.shape;
-}
-
-Result<ValueDescr> OutputType::Resolve(KernelContext* ctx,
-                                       const std::vector<ValueDescr>& args) const {
-  ValueDescr::Shape broadcasted_shape = GetBroadcastShape(args);
+Result<TypeHolder> OutputType::Resolve(KernelContext* ctx,
+                                       const std::vector<TypeHolder>& types) const {
   if (kind_ == OutputType::FIXED) {
-    return ValueDescr(type_, shape_ == ValueDescr::ANY ? broadcasted_shape : shape_);
+    return type_.get();
   } else {
-    ARROW_ASSIGN_OR_RAISE(ValueDescr resolved_descr, resolver_(ctx, args));
-    if (resolved_descr.shape == ValueDescr::ANY) {
-      resolved_descr.shape = broadcasted_shape;
-    }
-    return resolved_descr;
+    return resolver_(ctx, types);
   }
 }
 
@@ -448,19 +434,19 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
   return true;
 }
 
-bool KernelSignature::MatchesInputs(const std::vector<ValueDescr>& args) const {
+bool KernelSignature::MatchesInputs(const std::vector<TypeHolder>& types) const {
   if (is_varargs_) {
-    for (size_t i = 0; i < args.size(); ++i) {
-      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(args[i])) {
+    for (size_t i = 0; i < types.size(); ++i) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(types[i])) {
         return false;
       }
     }
   } else {
-    if (args.size() != in_types_.size()) {
+    if (types.size() != in_types_.size()) {
       return false;
     }
     for (size_t i = 0; i < in_types_.size(); ++i) {
-      if (!in_types_[i].Matches(args[i])) {
+      if (!in_types_[i].Matches(types[i])) {
         return false;
       }
     }
@@ -495,7 +481,7 @@ std::string KernelSignature::ToString() const {
     ss << in_types_[i].ToString();
   }
   if (is_varargs_) {
-    ss << "]";
+    ss << "*]";
   } else {
     ss << ")";
   }
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 93a1c605a99..1b412af525e 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -101,7 +101,7 @@ struct ARROW_EXPORT TypeMatcher {
   virtual ~TypeMatcher() = default;
 
   /// \brief Return true if this matcher accepts the data type.
-  virtual bool Matches(const DataType& type) const = 0;
+  virtual bool Matches(const TypeHolder& type) const = 0;
 
   /// \brief A human-interpretable string representation of what the type
   /// matcher checks for, usable when printing KernelSignature or formatting
@@ -143,10 +143,14 @@ ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
 
 }  // namespace match
 
-/// \brief An object used for type- and shape-checking arguments to be passed
-/// to a kernel and stored in a KernelSignature. Distinguishes between ARRAY
-/// and SCALAR arguments using ValueDescr::Shape. The type-checking rule can be
-/// supplied either with an exact DataType instance or a custom TypeMatcher.
+/// \brief Shape qualifier for value types. In certain instances
+/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
+/// other kernels arguments can be arrays or scalars
+enum class ArgShape { ANY, ARRAY, SCALAR };
+
+/// \brief An object used for type-checking arguments to be passed to a kernel
+/// and stored in a KernelSignature. The type-checking rule can be supplied
+/// either with an exact DataType instance or a custom TypeMatcher.
 class ARROW_EXPORT InputType {
  public:
   /// \brief The kind of type-checking rule that the InputType contains.
@@ -163,29 +167,21 @@ class ARROW_EXPORT InputType {
     USE_TYPE_MATCHER
   };
 
-  /// \brief Accept any value type but with a specific shape (e.g. any Array or
-  /// any Scalar).
-  InputType(ValueDescr::Shape shape = ValueDescr::ANY)  // NOLINT implicit construction
-      : kind_(ANY_TYPE), shape_(shape) {}
+  /// \brief Accept any value type
+  InputType() : kind_(ANY_TYPE) {}
 
   /// \brief Accept an exact value type.
-  InputType(std::shared_ptr<DataType> type,  // NOLINT implicit construction
-            ValueDescr::Shape shape = ValueDescr::ANY)
-      : kind_(EXACT_TYPE), shape_(shape), type_(std::move(type)) {}
-
-  /// \brief Accept an exact value type and shape provided by a ValueDescr.
-  InputType(const ValueDescr& descr)  // NOLINT implicit construction
-      : InputType(descr.type, descr.shape) {}
+  InputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
+      : kind_(EXACT_TYPE), type_(std::move(type)) {}
 
   /// \brief Use the passed TypeMatcher to type check.
-  InputType(std::shared_ptr<TypeMatcher> type_matcher,  // NOLINT implicit construction
-            ValueDescr::Shape shape = ValueDescr::ANY)
-      : kind_(USE_TYPE_MATCHER), shape_(shape), type_matcher_(std::move(type_matcher)) {}
+  InputType(std::shared_ptr<TypeMatcher> type_matcher)  // NOLINT implicit construction
+      : kind_(USE_TYPE_MATCHER), type_matcher_(std::move(type_matcher)) {}
 
   /// \brief Match any type with the given Type::type. Uses a TypeMatcher for
   /// its implementation.
-  explicit InputType(Type::type type_id, ValueDescr::Shape shape = ValueDescr::ANY)
-      : InputType(match::SameTypeId(type_id), shape) {}
+  InputType(Type::type type_id)  // NOLINT implicit construction
+      : InputType(match::SameTypeId(type_id)) {}
 
   InputType(const InputType& other) { CopyInto(other); }
 
@@ -195,23 +191,8 @@ class ARROW_EXPORT InputType {
 
   void operator=(InputType&& other) { MoveInto(std::forward<InputType>(other)); }
 
-  // \brief Match an array with the given exact type. Convenience constructor.
-  static InputType Array(std::shared_ptr<DataType> type) {
-    return InputType(std::move(type), ValueDescr::ARRAY);
-  }
-
-  // \brief Match a scalar with the given exact type. Convenience constructor.
-  static InputType Scalar(std::shared_ptr<DataType> type) {
-    return InputType(std::move(type), ValueDescr::SCALAR);
-  }
-
-  // \brief Match an array with the given Type::type id. Convenience
-  // constructor.
-  static InputType Array(Type::type id) { return InputType(id, ValueDescr::ARRAY); }
-
-  // \brief Match a scalar with the given Type::type id. Convenience
-  // constructor.
-  static InputType Scalar(Type::type id) { return InputType(id, ValueDescr::SCALAR); }
+  // \brief Match any input (array, scalar of any type)
+  static InputType Any() { return InputType(); }
 
   /// \brief Return true if this input type matches the same type cases as the
   /// other.
@@ -227,21 +208,16 @@ class ARROW_EXPORT InputType {
   /// \brief Render a human-readable string representation.
   std::string ToString() const;
 
-  /// \brief Return true if the value matches this argument kind in type
-  /// and shape.
+  /// \brief Return true if the Datum matches this argument kind in
+  /// type (and only allows scalar or array-like Datums).
   bool Matches(const Datum& value) const;
 
-  /// \brief Return true if the value descriptor matches this argument kind in
-  /// type and shape.
-  bool Matches(const ValueDescr& value) const;
+  /// \brief Return true if the type matches this InputType
+  bool Matches(const TypeHolder& type) const;
 
   /// \brief The type matching rule that this InputType uses.
   Kind kind() const { return kind_; }
 
-  /// \brief Indicates whether this InputType matches Array (ValueDescr::ARRAY),
-  /// Scalar (ValueDescr::SCALAR) values, or both (ValueDescr::ANY).
-  ValueDescr::Shape shape() const { return shape_; }
-
   /// \brief For InputType::EXACT_TYPE kind, the exact type that this InputType
   /// must match. Otherwise this function should not be used and will assert in
   /// debug builds.
@@ -255,22 +231,18 @@ class ARROW_EXPORT InputType {
  private:
   void CopyInto(const InputType& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->type_matcher_ = other.type_matcher_;
   }
 
   void MoveInto(InputType&& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = std::move(other.type_);
     this->type_matcher_ = std::move(other.type_matcher_);
   }
 
   Kind kind_;
 
-  ValueDescr::Shape shape_ = ValueDescr::ANY;
-
   // For EXACT_TYPE Kind
   std::shared_ptr<DataType> type_;
 
@@ -279,43 +251,30 @@ class ARROW_EXPORT InputType {
 };
 
 /// \brief Container to capture both exact and input-dependent output types.
-///
-/// The value shape returned by Resolve will be determined by broadcasting the
-/// shapes of the input arguments, otherwise this is handled by the
-/// user-defined resolver function:
-///
-/// * Any ARRAY shape -> output shape is ARRAY
-/// * All SCALAR shapes -> output shape is SCALAR
 class ARROW_EXPORT OutputType {
  public:
   /// \brief An enum indicating whether the value type is an invariant fixed
   /// value or one that's computed by a kernel-defined resolver function.
   enum ResolveKind { FIXED, COMPUTED };
 
-  /// Type resolution function. Given input types and shapes, return output
-  /// type and shape.  This function MAY may use the kernel state to decide
-  /// the output type based on the functionoptions.
+  /// Type resolution function. Given input types, return output type.  This
+  /// function MAY may use the kernel state to decide the output type based on
+  /// the FunctionOptions.
   ///
   /// This function SHOULD _not_ be used to check for arity, that is to be
   /// performed one or more layers above.
-  using Resolver =
-      std::function<Result<ValueDescr>(KernelContext*, const std::vector<ValueDescr>&)>;
+  typedef Result<TypeHolder> (*Resolver)(KernelContext*, const std::vector<TypeHolder>&);
 
-  /// \brief Output an exact type, but with shape determined by promoting the
-  /// shapes of the inputs (any ARRAY argument yields ARRAY).
+  /// \brief Output an exact type
   OutputType(std::shared_ptr<DataType> type)  // NOLINT implicit construction
       : kind_(FIXED), type_(std::move(type)) {}
 
-  /// \brief Output the exact type and shape provided by a ValueDescr
-  OutputType(ValueDescr descr);  // NOLINT implicit construction
-
   /// \brief Output a computed type depending on actual input types
   OutputType(Resolver resolver)  // NOLINT implicit construction
       : kind_(COMPUTED), resolver_(std::move(resolver)) {}
 
   OutputType(const OutputType& other) {
     this->kind_ = other.kind_;
-    this->shape_ = other.shape_;
     this->type_ = other.type_;
     this->resolver_ = other.resolver_;
   }
@@ -323,19 +282,17 @@ class ARROW_EXPORT OutputType {
   OutputType(OutputType&& other) {
     this->kind_ = other.kind_;
     this->type_ = std::move(other.type_);
-    this->shape_ = other.shape_;
     this->resolver_ = other.resolver_;
   }
 
   OutputType& operator=(const OutputType&) = default;
   OutputType& operator=(OutputType&&) = default;
 
-  /// \brief Return the shape and type of the expected output value of the
-  /// kernel given the value descriptors (shapes and types) of the input
-  /// arguments. The resolver may make use of state information kept in the
-  /// KernelContext.
-  Result<ValueDescr> Resolve(KernelContext* ctx,
-                             const std::vector<ValueDescr>& args) const;
+  /// \brief Return the type of the expected output value of the kernel given
+  /// the input argument types. The resolver may make use of state information
+  /// kept in the KernelContext.
+  Result<TypeHolder> Resolve(KernelContext* ctx,
+                             const std::vector<TypeHolder>& args) const;
 
   /// \brief The exact output value type for the FIXED kind.
   const std::shared_ptr<DataType>& type() const;
@@ -352,20 +309,12 @@ class ARROW_EXPORT OutputType {
   /// fixed/invariant or computed by a resolver.
   ResolveKind kind() const { return kind_; }
 
-  /// \brief If the shape is ANY, then Resolve will compute the shape based on
-  /// the input arguments.
-  ValueDescr::Shape shape() const { return shape_; }
-
  private:
   ResolveKind kind_;
 
   // For FIXED resolution
   std::shared_ptr<DataType> type_;
 
-  /// \brief The shape of the output type to return when using Resolve. If ANY
-  /// will promote the input shapes.
-  ValueDescr::Shape shape_ = ValueDescr::ANY;
-
   // For COMPUTED resolution
   Resolver resolver_;
 };
@@ -388,7 +337,7 @@ class ARROW_EXPORT KernelSignature {
 
   /// \brief Return true if the signature if compatible with the list of input
   /// value descriptors.
-  bool MatchesInputs(const std::vector<ValueDescr>& descriptors) const;
+  bool MatchesInputs(const std::vector<TypeHolder>& types) const;
 
   /// \brief Returns true if the input types of each signature are
   /// equal. Well-formed functions should have a deterministic output type
@@ -408,9 +357,10 @@ class ARROW_EXPORT KernelSignature {
   /// function arguments.
   const std::vector<InputType>& in_types() const { return in_types_; }
 
-  /// \brief The output type for the kernel. Use Resolve to return the exact
-  /// output given input argument ValueDescrs, since many kernels' output types
-  /// depend on their input types (or their type metadata).
+  /// \brief The output type for the kernel. Use Resolve to return the
+  /// exact output given input argument types, since many kernels'
+  /// output types depend on their input types (or their type
+  /// metadata).
   const OutputType& out_type() const { return out_type_; }
 
   /// \brief Render a human-readable string representation
@@ -493,12 +443,9 @@ struct KernelInitArgs {
   /// depend on the kernel's KernelSignature or other data contained there.
   const Kernel* kernel;
 
-  /// \brief The types and shapes of the input arguments that the kernel is
+  /// \brief The types of the input arguments that the kernel is
   /// about to be executed against.
-  ///
-  /// TODO: should this be const std::vector<ValueDescr>*? const-ref is being
-  /// used to avoid the cost of copying the struct into the args struct.
-  const std::vector<ValueDescr>& inputs;
+  const std::vector<TypeHolder>& inputs;
 
   /// \brief Opaque options specific to this kernel. May be nullptr for functions
   /// that do not require options.
@@ -523,7 +470,7 @@ struct Kernel {
                std::move(init)) {}
 
   /// \brief The "signature" of the kernel containing the InputType input
-  /// argument validators and OutputType output type and shape resolver.
+  /// argument validators and OutputType output type resolver.
   std::shared_ptr<KernelSignature> signature;
 
   /// \brief Create a new KernelState for invocations of this kernel, e.g. to
@@ -546,6 +493,9 @@ struct Kernel {
   /// contain multiple kernels with the same signature but different levels of SIMD,
   /// so that the most optimized kernel supported on a host's processor can be chosen.
   SimdLevel::type simd_level = SimdLevel::NONE;
+
+  // Additional kernel-specific data
+  std::shared_ptr<KernelState> data;
 };
 
 /// \brief The scalar kernel execution API that must be implemented for SCALAR
@@ -555,8 +505,7 @@ struct Kernel {
 /// endeavor to write into pre-allocated memory if they are able, though for
 /// some kernels (e.g. in cases when a builder like StringBuilder) must be
 /// employed this may not be possible.
-using ArrayKernelExec =
-    std::function<Status(KernelContext*, const ExecSpan&, ExecResult*)>;
+typedef Status (*ArrayKernelExec)(KernelContext*, const ExecSpan&, ExecResult*);
 
 /// \brief Kernel data structure for implementations of ScalarFunction. In
 /// addition to the members found in Kernel, contains the null handling
@@ -566,12 +515,11 @@ struct ScalarKernel : public Kernel {
 
   ScalarKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
                KernelInit init = NULLPTR)
-      : Kernel(std::move(sig), init), exec(std::move(exec)) {}
+      : Kernel(std::move(sig), init), exec(exec) {}
 
   ScalarKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR)
-      : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
-        exec(std::move(exec)) {}
+      : Kernel(std::move(in_types), std::move(out_type), std::move(init)), exec(exec) {}
 
   /// \brief Perform a single invocation of this kernel. Depending on the
   /// implementation, it may only write into preallocated memory, while in some
@@ -590,9 +538,6 @@ struct ScalarKernel : public Kernel {
   // bitmaps is a reasonable default
   NullHandling::type null_handling = NullHandling::INTERSECTION;
   MemAllocation::type mem_allocation = MemAllocation::PREALLOCATE;
-
-  // Additional kernel-specific data
-  std::shared_ptr<KernelState> data;
 };
 
 // ----------------------------------------------------------------------
@@ -615,13 +560,13 @@ struct VectorKernel : public Kernel {
   VectorKernel(std::vector<InputType> in_types, OutputType out_type, ArrayKernelExec exec,
                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
       : Kernel(std::move(in_types), std::move(out_type), std::move(init)),
-        exec(std::move(exec)),
+        exec(exec),
         finalize(std::move(finalize)) {}
 
   VectorKernel(std::shared_ptr<KernelSignature> sig, ArrayKernelExec exec,
                KernelInit init = NULLPTR, FinalizeFunc finalize = NULLPTR)
       : Kernel(std::move(sig), std::move(init)),
-        exec(std::move(exec)),
+        exec(exec),
         finalize(std::move(finalize)) {}
 
   /// \brief Perform a single invocation of this kernel. Any required state is
diff --git a/cpp/src/arrow/compute/kernel_test.cc b/cpp/src/arrow/compute/kernel_test.cc
index 2d427374426..d995cca354c 100644
--- a/cpp/src/arrow/compute/kernel_test.cc
+++ b/cpp/src/arrow/compute/kernel_test.cc
@@ -21,6 +21,7 @@
 
 #include <gtest/gtest.h>
 
+#include "arrow/array/util.h"
 #include "arrow/compute/kernel.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
@@ -35,8 +36,8 @@ namespace compute {
 
 TEST(TypeMatcher, SameTypeId) {
   std::shared_ptr<TypeMatcher> matcher = match::SameTypeId(Type::DECIMAL);
-  ASSERT_TRUE(matcher->Matches(*decimal(12, 2)));
-  ASSERT_FALSE(matcher->Matches(*int8()));
+  ASSERT_TRUE(matcher->Matches(decimal(12, 2)));
+  ASSERT_FALSE(matcher->Matches(int8()));
 
   ASSERT_EQ("Type::DECIMAL128", matcher->ToString());
 
@@ -49,11 +50,11 @@ TEST(TypeMatcher, TimestampTypeUnit) {
   auto matcher = match::TimestampTypeUnit(TimeUnit::MILLI);
   auto matcher2 = match::Time32TypeUnit(TimeUnit::MILLI);
 
-  ASSERT_TRUE(matcher->Matches(*timestamp(TimeUnit::MILLI)));
-  ASSERT_TRUE(matcher->Matches(*timestamp(TimeUnit::MILLI, "utc")));
-  ASSERT_FALSE(matcher->Matches(*timestamp(TimeUnit::SECOND)));
-  ASSERT_FALSE(matcher->Matches(*time32(TimeUnit::MILLI)));
-  ASSERT_TRUE(matcher2->Matches(*time32(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher->Matches(timestamp(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher->Matches(timestamp(TimeUnit::MILLI, "utc")));
+  ASSERT_FALSE(matcher->Matches(timestamp(TimeUnit::SECOND)));
+  ASSERT_FALSE(matcher->Matches(time32(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher2->Matches(time32(TimeUnit::MILLI)));
 
   // Check ToString representation
   ASSERT_EQ("timestamp(s)", match::TimestampTypeUnit(TimeUnit::SECOND)->ToString());
@@ -75,43 +76,23 @@ TEST(InputType, AnyTypeConstructor) {
   // Check the ANY_TYPE ctors
   InputType ty;
   ASSERT_EQ(InputType::ANY_TYPE, ty.kind());
-  ASSERT_EQ(ValueDescr::ANY, ty.shape());
-
-  ty = InputType(ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty.shape());
-
-  ty = InputType(ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty.shape());
 }
 
 TEST(InputType, Constructors) {
   // Exact type constructor
   InputType ty1(int8());
   ASSERT_EQ(InputType::EXACT_TYPE, ty1.kind());
-  ASSERT_EQ(ValueDescr::ANY, ty1.shape());
   AssertTypeEqual(*int8(), *ty1.type());
 
   InputType ty1_implicit = int8();
   ASSERT_TRUE(ty1.Equals(ty1_implicit));
 
-  InputType ty1_array(int8(), ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty1_array.shape());
-
-  InputType ty1_scalar(int8(), ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty1_scalar.shape());
-
   // Same type id constructor
   InputType ty2(Type::DECIMAL);
   ASSERT_EQ(InputType::USE_TYPE_MATCHER, ty2.kind());
-  ASSERT_EQ("any[Type::DECIMAL128]", ty2.ToString());
-  ASSERT_TRUE(ty2.type_matcher().Matches(*decimal(12, 2)));
-  ASSERT_FALSE(ty2.type_matcher().Matches(*int16()));
-
-  InputType ty2_array(Type::DECIMAL, ValueDescr::ARRAY);
-  ASSERT_EQ(ValueDescr::ARRAY, ty2_array.shape());
-
-  InputType ty2_scalar(Type::DECIMAL, ValueDescr::SCALAR);
-  ASSERT_EQ(ValueDescr::SCALAR, ty2_scalar.shape());
+  ASSERT_EQ("Type::DECIMAL128", ty2.ToString());
+  ASSERT_TRUE(ty2.type_matcher().Matches(decimal(12, 2)));
+  ASSERT_FALSE(ty2.type_matcher().Matches(int16()));
 
   // Implicit construction in a vector
   std::vector<InputType> types = {int8(), InputType(Type::DECIMAL)};
@@ -131,69 +112,33 @@ TEST(InputType, Constructors) {
   ASSERT_TRUE(ty6.Equals(ty2));
 
   // ToString
-  ASSERT_EQ("any[int8]", ty1.ToString());
-  ASSERT_EQ("array[int8]", ty1_array.ToString());
-  ASSERT_EQ("scalar[int8]", ty1_scalar.ToString());
-
-  ASSERT_EQ("any[Type::DECIMAL128]", ty2.ToString());
-  ASSERT_EQ("array[Type::DECIMAL128]", ty2_array.ToString());
-  ASSERT_EQ("scalar[Type::DECIMAL128]", ty2_scalar.ToString());
+  ASSERT_EQ("int8", ty1.ToString());
+  ASSERT_EQ("Type::DECIMAL128", ty2.ToString());
 
   InputType ty7(match::TimestampTypeUnit(TimeUnit::MICRO));
-  ASSERT_EQ("any[timestamp(us)]", ty7.ToString());
+  ASSERT_EQ("timestamp(us)", ty7.ToString());
 
   InputType ty8;
-  InputType ty9(ValueDescr::ANY);
-  InputType ty10(ValueDescr::ARRAY);
-  InputType ty11(ValueDescr::SCALAR);
-  ASSERT_EQ("any[any]", ty8.ToString());
-  ASSERT_EQ("any[any]", ty9.ToString());
-  ASSERT_EQ("array[any]", ty10.ToString());
-  ASSERT_EQ("scalar[any]", ty11.ToString());
+  ASSERT_EQ("any", ty8.ToString());
 }
 
 TEST(InputType, Equals) {
   InputType t1 = int8();
   InputType t2 = int8();
-  InputType t3(int8(), ValueDescr::ARRAY);
-  InputType t3_i32(int32(), ValueDescr::ARRAY);
-  InputType t3_scalar(int8(), ValueDescr::SCALAR);
-  InputType t4(int8(), ValueDescr::ARRAY);
-  InputType t4_i32(int32(), ValueDescr::ARRAY);
+  InputType t3 = int32();
 
   InputType t5(Type::DECIMAL);
   InputType t6(Type::DECIMAL);
-  InputType t7(Type::DECIMAL, ValueDescr::SCALAR);
-  InputType t7_i32(Type::INT32, ValueDescr::SCALAR);
-  InputType t8(Type::DECIMAL, ValueDescr::SCALAR);
-  InputType t8_i32(Type::INT32, ValueDescr::SCALAR);
 
   ASSERT_TRUE(t1.Equals(t2));
   ASSERT_EQ(t1, t2);
-
-  // ANY vs SCALAR
   ASSERT_NE(t1, t3);
 
-  ASSERT_EQ(t3, t4);
-
-  // both ARRAY, but different type
-  ASSERT_NE(t3, t3_i32);
-
-  // ARRAY vs SCALAR
-  ASSERT_NE(t3, t3_scalar);
-
-  ASSERT_EQ(t3_i32, t4_i32);
-
   ASSERT_FALSE(t1.Equals(t5));
   ASSERT_NE(t1, t5);
 
   ASSERT_EQ(t5, t5);
   ASSERT_EQ(t5, t6);
-  ASSERT_NE(t5, t7);
-  ASSERT_EQ(t7, t8);
-  ASSERT_EQ(t7, t8);
-  ASSERT_NE(t7, t7_i32);
-  ASSERT_EQ(t7_i32, t8_i32);
 
   // NOTE: For the time being, we treat int32() and Type::INT32 as being
   // different. This could obviously be fixed later to make these equivalent
@@ -208,9 +153,6 @@ TEST(InputType, Equals) {
 
 TEST(InputType, Hash) {
   InputType t0;
-  InputType t0_scalar(ValueDescr::SCALAR);
-  InputType t0_array(ValueDescr::ARRAY);
-
   InputType t1 = int8();
   InputType t2(Type::DECIMAL);
 
@@ -218,36 +160,32 @@ TEST(InputType, Hash) {
   // same value, and whether the elements of the type are all incorporated into
   // the Hash
   ASSERT_EQ(t0.Hash(), t0.Hash());
-  ASSERT_NE(t0.Hash(), t0_scalar.Hash());
-  ASSERT_NE(t0.Hash(), t0_array.Hash());
-  ASSERT_NE(t0_scalar.Hash(), t0_array.Hash());
-
   ASSERT_EQ(t1.Hash(), t1.Hash());
   ASSERT_EQ(t2.Hash(), t2.Hash());
-
   ASSERT_NE(t0.Hash(), t1.Hash());
   ASSERT_NE(t0.Hash(), t2.Hash());
   ASSERT_NE(t1.Hash(), t2.Hash());
 }
 
 TEST(InputType, Matches) {
-  InputType ty1 = int8();
-
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Scalar(int8())));
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Array(int8())));
-  ASSERT_TRUE(ty1.Matches(ValueDescr::Any(int8())));
-  ASSERT_FALSE(ty1.Matches(ValueDescr::Any(int16())));
-
-  InputType ty2(Type::DECIMAL);
-  ASSERT_TRUE(ty2.Matches(ValueDescr::Scalar(decimal(12, 2))));
-  ASSERT_TRUE(ty2.Matches(ValueDescr::Array(decimal(12, 2))));
-  ASSERT_FALSE(ty2.Matches(ValueDescr::Any(float64())));
-
-  InputType ty3(int64(), ValueDescr::SCALAR);
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Array(int64())));
-  ASSERT_TRUE(ty3.Matches(ValueDescr::Scalar(int64())));
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Scalar(int32())));
-  ASSERT_FALSE(ty3.Matches(ValueDescr::Any(int64())));
+  InputType input1 = int8();
+
+  ASSERT_TRUE(input1.Matches(int8()));
+  ASSERT_TRUE(input1.Matches(int8()));
+  ASSERT_FALSE(input1.Matches(int16()));
+
+  InputType input2(Type::DECIMAL);
+  ASSERT_TRUE(input2.Matches(decimal(12, 2)));
+
+  auto ty2 = decimal(12, 2);
+  auto ty3 = float64();
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> arr2, MakeArrayOfNull(ty2, 1));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> arr3, MakeArrayOfNull(ty3, 1));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Scalar> scalar2, arr2->GetScalar(0));
+  ASSERT_TRUE(input2.Matches(Datum(arr2)));
+  ASSERT_TRUE(input2.Matches(Datum(scalar2)));
+  ASSERT_FALSE(input2.Matches(ty3));
+  ASSERT_FALSE(input2.Matches(arr3));
 }
 
 // ----------------------------------------------------------------------
@@ -259,14 +197,14 @@ TEST(OutputType, Constructors) {
   AssertTypeEqual(*int8(), *ty1.type());
 
   auto DummyResolver = [](KernelContext*,
-                          const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
-    return ValueDescr(int32(), GetBroadcastShape(args));
+                          const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+    return int32();
   };
   OutputType ty2(DummyResolver);
   ASSERT_EQ(OutputType::COMPUTED, ty2.kind());
 
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr2, ty2.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr2);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type2, ty2.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type2, int32());
 
   // Copy constructor
   OutputType ty3 = ty1;
@@ -275,8 +213,8 @@ TEST(OutputType, Constructors) {
 
   OutputType ty4 = ty2;
   ASSERT_EQ(OutputType::COMPUTED, ty4.kind());
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr4, ty4.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr4);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type4, ty4.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type4, int32());
 
   // Move constructor
   OutputType ty5 = std::move(ty1);
@@ -285,8 +223,8 @@ TEST(OutputType, Constructors) {
 
   OutputType ty6 = std::move(ty4);
   ASSERT_EQ(OutputType::COMPUTED, ty6.kind());
-  ASSERT_OK_AND_ASSIGN(ValueDescr out_descr6, ty6.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), out_descr6);
+  ASSERT_OK_AND_ASSIGN(TypeHolder out_type6, ty6.Resolve(nullptr, {}));
+  ASSERT_EQ(out_type6, int32());
 
   // ToString
 
@@ -296,89 +234,63 @@ TEST(OutputType, Constructors) {
 }
 
 TEST(OutputType, Resolve) {
-  // Check shape promotion rules for FIXED kind
   OutputType ty1(int32());
 
-  ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty1.Resolve(nullptr, {}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(TypeHolder result, ty1.Resolve(nullptr, {}));
+  ASSERT_EQ(result, int32());
 
-  ASSERT_OK_AND_ASSIGN(descr,
-                       ty1.Resolve(nullptr, {ValueDescr(int8(), ValueDescr::SCALAR)}));
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty1.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
 
-  ASSERT_OK_AND_ASSIGN(descr,
-                       ty1.Resolve(nullptr, {ValueDescr(int8(), ValueDescr::SCALAR),
-                                             ValueDescr(int8(), ValueDescr::ARRAY)}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty1.Resolve(nullptr, {int8(), int8()}));
+  ASSERT_EQ(result, int32());
 
-  OutputType ty2([](KernelContext*, const std::vector<ValueDescr>& args) {
-    return ValueDescr(args[0].type, GetBroadcastShape(args));
-  });
+  auto resolver = [](KernelContext*,
+                     const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+    return args[0];
+  };
+  OutputType ty2(resolver);
 
-  ASSERT_OK_AND_ASSIGN(descr, ty2.Resolve(nullptr, {ValueDescr::Array(utf8())}));
-  ASSERT_EQ(ValueDescr::Array(utf8()), descr);
+  ASSERT_OK_AND_ASSIGN(result, ty2.Resolve(nullptr, {utf8()}));
+  ASSERT_EQ(result, utf8());
 
   // Type resolver that returns an error
   OutputType ty3(
-      [](KernelContext* ctx, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
+      [](KernelContext* ctx, const std::vector<TypeHolder>& types) -> Result<TypeHolder> {
         // NB: checking the value types versus the function arity should be
         // validated elsewhere, so this is just for illustration purposes
-        if (args.size() == 0) {
+        if (types.size() == 0) {
           return Status::Invalid("Need at least one argument");
         }
-        return ValueDescr(args[0]);
+        return types[0];
       });
   ASSERT_RAISES(Invalid, ty3.Resolve(nullptr, {}));
 
-  // Type resolver that returns ValueDescr::ANY and needs type promotion
+  // Type resolver that returns a fixed value
   OutputType ty4(
-      [](KernelContext* ctx, const std::vector<ValueDescr>& args) -> Result<ValueDescr> {
+      [](KernelContext* ctx, const std::vector<TypeHolder>& types) -> Result<TypeHolder> {
         return int32();
       });
 
-  ASSERT_OK_AND_ASSIGN(descr, ty4.Resolve(nullptr, {ValueDescr::Array(int8())}));
-  ASSERT_EQ(ValueDescr::Array(int32()), descr);
-  ASSERT_OK_AND_ASSIGN(descr, ty4.Resolve(nullptr, {ValueDescr::Scalar(int8())}));
-  ASSERT_EQ(ValueDescr::Scalar(int32()), descr);
-}
-
-TEST(OutputType, ResolveDescr) {
-  ValueDescr d1 = ValueDescr::Scalar(int32());
-  ValueDescr d2 = ValueDescr::Array(int32());
-
-  OutputType ty1(d1);
-  OutputType ty2(d2);
-
-  ASSERT_EQ(ValueDescr::SCALAR, ty1.shape());
-  ASSERT_EQ(ValueDescr::ARRAY, ty2.shape());
-
-  {
-    ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty1.Resolve(nullptr, {}));
-    ASSERT_EQ(d1, descr);
-  }
-
-  {
-    ASSERT_OK_AND_ASSIGN(ValueDescr descr, ty2.Resolve(nullptr, {}));
-    ASSERT_EQ(d2, descr);
-  }
+  ASSERT_OK_AND_ASSIGN(result, ty4.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
+  ASSERT_OK_AND_ASSIGN(result, ty4.Resolve(nullptr, {int8()}));
+  ASSERT_EQ(result, int32());
 }
 
 // ----------------------------------------------------------------------
 // KernelSignature
 
 TEST(KernelSignature, Basics) {
-  // (any[int8], scalar[decimal]) -> utf8
-  std::vector<InputType> in_types({int8(), InputType(Type::DECIMAL, ValueDescr::SCALAR)});
+  // (int8, decimal) -> utf8
+  std::vector<InputType> in_types({int8(), InputType(Type::DECIMAL)});
   OutputType out_type(utf8());
 
   KernelSignature sig(in_types, out_type);
   ASSERT_EQ(2, sig.in_types().size());
   ASSERT_TRUE(sig.in_types()[0].type()->Equals(*int8()));
-  ASSERT_TRUE(sig.in_types()[0].Matches(ValueDescr::Scalar(int8())));
-  ASSERT_TRUE(sig.in_types()[0].Matches(ValueDescr::Array(int8())));
-
-  ASSERT_TRUE(sig.in_types()[1].Matches(ValueDescr::Scalar(decimal(12, 2))));
-  ASSERT_FALSE(sig.in_types()[1].Matches(ValueDescr::Array(decimal(12, 2))));
+  ASSERT_TRUE(sig.in_types()[0].Matches(int8()));
+  ASSERT_TRUE(sig.in_types()[1].Matches(decimal(12, 2)));
 }
 
 TEST(KernelSignature, Equals) {
@@ -393,10 +305,6 @@ TEST(KernelSignature, Equals) {
   KernelSignature sig4_copy({int8(), int16()}, utf8());
   KernelSignature sig5({int8(), int16(), int32()}, utf8());
 
-  // Differ in shape
-  KernelSignature sig6({ValueDescr::Scalar(int8())}, utf8());
-  KernelSignature sig7({ValueDescr::Array(int8())}, utf8());
-
   ASSERT_EQ(sig1, sig1);
 
   ASSERT_EQ(sig2, sig3);
@@ -408,8 +316,6 @@ TEST(KernelSignature, Equals) {
 
   // Match first 2 args, but not third
   ASSERT_NE(sig4, sig5);
-
-  ASSERT_NE(sig6, sig7);
 }
 
 TEST(KernelSignature, VarArgsEquals) {
@@ -441,40 +347,32 @@ TEST(KernelSignature, MatchesInputs) {
   ASSERT_TRUE(sig1.MatchesInputs({}));
   ASSERT_FALSE(sig1.MatchesInputs({int8()}));
 
-  // (any[int8], any[decimal]) -> boolean
+  // (int8, decimal) -> boolean
   KernelSignature sig2({int8(), InputType(Type::DECIMAL)}, boolean());
 
   ASSERT_FALSE(sig2.MatchesInputs({}));
   ASSERT_FALSE(sig2.MatchesInputs({int8()}));
   ASSERT_TRUE(sig2.MatchesInputs({int8(), decimal(12, 2)}));
-  ASSERT_TRUE(sig2.MatchesInputs(
-      {ValueDescr::Scalar(int8()), ValueDescr::Scalar(decimal(12, 2))}));
-  ASSERT_TRUE(
-      sig2.MatchesInputs({ValueDescr::Array(int8()), ValueDescr::Array(decimal(12, 2))}));
 
-  // (scalar[int8], array[int32]) -> boolean
-  KernelSignature sig3({ValueDescr::Scalar(int8()), ValueDescr::Array(int32())},
-                       boolean());
+  // (int8, int32) -> boolean
+  KernelSignature sig3({int8(), int32()}, boolean());
 
   ASSERT_FALSE(sig3.MatchesInputs({}));
 
   // Unqualified, these are ANY type and do not match because the kernel
   // requires a scalar and an array
-  ASSERT_FALSE(sig3.MatchesInputs({int8(), int32()}));
-  ASSERT_TRUE(
-      sig3.MatchesInputs({ValueDescr::Scalar(int8()), ValueDescr::Array(int32())}));
-  ASSERT_FALSE(
-      sig3.MatchesInputs({ValueDescr::Array(int8()), ValueDescr::Array(int32())}));
+  ASSERT_TRUE(sig3.MatchesInputs({int8(), int32()}));
+  ASSERT_FALSE(sig3.MatchesInputs({int8(), int16()}));
 }
 
 TEST(KernelSignature, VarArgsMatchesInputs) {
   {
     KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
 
-    std::vector<ValueDescr> args = {int8()};
+    std::vector<TypeHolder> args = {int8()};
     ASSERT_TRUE(sig.MatchesInputs(args));
-    args.push_back(ValueDescr::Scalar(int8()));
-    args.push_back(ValueDescr::Array(int8()));
+    args.push_back(int8());
+    args.push_back(int8());
     ASSERT_TRUE(sig.MatchesInputs(args));
     args.push_back(int32());
     ASSERT_FALSE(sig.MatchesInputs(args));
@@ -482,10 +380,10 @@ TEST(KernelSignature, VarArgsMatchesInputs) {
   {
     KernelSignature sig({int8(), utf8()}, utf8(), /*is_varargs=*/true);
 
-    std::vector<ValueDescr> args = {int8()};
+    std::vector<TypeHolder> args = {int8()};
     ASSERT_TRUE(sig.MatchesInputs(args));
-    args.push_back(ValueDescr::Scalar(utf8()));
-    args.push_back(ValueDescr::Array(utf8()));
+    args.push_back(utf8());
+    args.push_back(utf8());
     ASSERT_TRUE(sig.MatchesInputs(args));
     args.push_back(int32());
     ASSERT_FALSE(sig.MatchesInputs(args));
@@ -493,23 +391,25 @@ TEST(KernelSignature, VarArgsMatchesInputs) {
 }
 
 TEST(KernelSignature, ToString) {
-  std::vector<InputType> in_types = {InputType(int8(), ValueDescr::SCALAR),
-                                     InputType(Type::DECIMAL, ValueDescr::ARRAY),
+  std::vector<InputType> in_types = {InputType(int8()), InputType(Type::DECIMAL),
                                      InputType(utf8())};
   KernelSignature sig(in_types, utf8());
-  ASSERT_EQ("(scalar[int8], array[Type::DECIMAL128], any[string]) -> string",
-            sig.ToString());
-
-  OutputType out_type([](KernelContext*, const std::vector<ValueDescr>& args) {
-    return Status::Invalid("NYI");
-  });
-  KernelSignature sig2({int8(), InputType(Type::DECIMAL)}, out_type);
-  ASSERT_EQ("(any[int8], any[Type::DECIMAL128]) -> computed", sig2.ToString());
+  ASSERT_EQ("(int8, Type::DECIMAL128, string) -> string", sig.ToString());
+
+  OutputType out_type(
+      [](KernelContext*, const std::vector<TypeHolder>& args) -> Result<TypeHolder> {
+        return Status::Invalid("NYI");
+      });
+  KernelSignature sig2({int8(), Type::DECIMAL}, out_type);
+  ASSERT_EQ("(int8, Type::DECIMAL128) -> computed", sig2.ToString());
 }
 
 TEST(KernelSignature, VarArgsToString) {
   KernelSignature sig({int8()}, utf8(), /*is_varargs=*/true);
-  ASSERT_EQ("varargs[any[int8]] -> string", sig.ToString());
+  ASSERT_EQ("varargs[int8*] -> string", sig.ToString());
+
+  KernelSignature sig2({utf8(), int8()}, utf8(), /*is_varargs=*/true);
+  ASSERT_EQ("varargs[string, int8*] -> string", sig2.ToString());
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 661b6a4edb1..57cee87f00d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -195,7 +195,7 @@ Result<std::unique_ptr<KernelState>> CountDistinctInit(KernelContext* ctx,
 
 template <typename Type, typename VisitorArgType = typename Type::c_type>
 void AddCountDistinctKernel(InputType type, ScalarAggregateFunction* func) {
-  AddAggKernel(KernelSignature::Make({type}, ValueDescr::Scalar(int64())),
+  AddAggKernel(KernelSignature::Make({type}, int64()),
                CountDistinctInit<Type, VisitorArgType>, func);
 }
 
@@ -252,7 +252,7 @@ struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -260,7 +260,7 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -277,7 +277,7 @@ struct ProductImpl : public ScalarAggregator {
   using ProductType = typename TypeTraits<AccType>::CType;
   using OutputType = typename TypeTraits<AccType>::ScalarType;
 
-  explicit ProductImpl(const std::shared_ptr<DataType>& out_type,
+  explicit ProductImpl(std::shared_ptr<DataType> out_type,
                        const ScalarAggregateOptions& options)
       : out_type(out_type),
         options(options),
@@ -356,10 +356,10 @@ struct NullProductImpl : public NullImpl<Int64Type> {
 struct ProductInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
-  const std::shared_ptr<DataType>& type;
+  std::shared_ptr<DataType> type;
   const ScalarAggregateOptions& options;
 
-  ProductInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  ProductInit(KernelContext* ctx, std::shared_ptr<DataType> type,
               const ScalarAggregateOptions& options)
       : ctx(ctx), type(type), options(options) {}
 
@@ -402,7 +402,7 @@ struct ProductInit {
 
   static Result<std::unique_ptr<KernelState>> Init(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
-    ProductInit visitor(ctx, args.inputs[0].type,
+    ProductInit visitor(ctx, args.inputs[0].GetSharedPtr(),
                         static_cast<const ScalarAggregateOptions&>(*args.options));
     return visitor.Create();
   }
@@ -413,10 +413,10 @@ struct ProductInit {
 
 Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -425,14 +425,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
 template <MinOrMax min_or_max>
 void AddMinOrMaxAggKernel(ScalarAggregateFunction* func,
                           ScalarAggregateFunction* min_max_func) {
-  auto sig = KernelSignature::Make(
-      {InputType(ValueDescr::ANY)},
-      OutputType([](KernelContext*,
-                    const std::vector<ValueDescr>& descrs) -> Result<ValueDescr> {
-        // any[T] -> scalar[T]
-        return ValueDescr::Scalar(descrs.front().type);
-      }));
-
+  auto sig = KernelSignature::Make({InputType::Any()}, FirstType);
   auto init = [min_max_func](
                   KernelContext* ctx,
                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
@@ -775,8 +768,7 @@ void AddBasicAggKernels(KernelInit init,
                         SimdLevel::type simd_level) {
   for (const auto& ty : types) {
     // array[InT] -> scalar[OutT]
-    auto sig =
-        KernelSignature::Make({InputType::Array(ty->id())}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({ty->id()}, out_ty);
     AddAggKernel(std::move(sig), init, func, simd_level);
   }
 }
@@ -786,9 +778,7 @@ void AddScalarAggKernels(KernelInit init,
                          std::shared_ptr<DataType> out_ty,
                          ScalarAggregateFunction* func) {
   for (const auto& ty : types) {
-    // scalar[InT] -> scalar[OutT]
-    auto sig =
-        KernelSignature::Make({InputType::Scalar(ty->id())}, ValueDescr::Scalar(out_ty));
+    auto sig = KernelSignature::Make({ty->id()}, out_ty);
     AddAggKernel(std::move(sig), init, func, SimdLevel::NONE);
   }
 }
@@ -804,17 +794,17 @@ void AddArrayScalarAggKernels(KernelInit init,
 
 namespace {
 
-Result<ValueDescr> MinMaxType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  // any[T] -> scalar[struct<min: T, max: T>]
-  auto ty = descrs.front().type;
-  return ValueDescr::Scalar(struct_({field("min", ty), field("max", ty)}));
+Result<TypeHolder> MinMaxType(KernelContext*, const std::vector<TypeHolder>& types) {
+  // T -> struct<min: T, max: T>
+  auto ty = types.front().GetSharedPtr();
+  return struct_({field("min", ty), field("max", ty)});
 }
 
 }  // namespace
 
 void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func, SimdLevel::type simd_level) {
-  auto sig = KernelSignature::Make({InputType(get_id.id)}, OutputType(MinMaxType));
+  auto sig = KernelSignature::Make({InputType(get_id.id)}, MinMaxType);
   AddAggKernel(std::move(sig), init, func, simd_level);
 }
 
@@ -828,13 +818,6 @@ void AddMinMaxKernels(KernelInit init,
 
 namespace {
 
-Result<ValueDescr> ScalarFirstType(KernelContext*,
-                                   const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.front();
-  result.shape = ValueDescr::SCALAR;
-  return result;
-}
-
 const FunctionDoc count_doc{"Count the number of null / non-null values",
                             ("By default, only non-null values are counted.\n"
                              "This can be changed through CountOptions."),
@@ -922,8 +905,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
 
   // Takes any input, outputs int64 scalar
   InputType any_input;
-  AddAggKernel(KernelSignature::Make({any_input}, ValueDescr::Scalar(int64())), CountInit,
-               func.get());
+  AddAggKernel(KernelSignature::Make({any_input}, int64()), CountInit, func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
   func = std::make_shared<ScalarAggregateFunction>(
@@ -935,12 +917,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   func = std::make_shared<ScalarAggregateFunction>("sum", Arity::Unary(), sum_doc,
                                                    &default_scalar_aggregate_options);
   AddArrayScalarAggKernels(SumInit, {boolean()}, uint64(), func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      SumInit, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      SumInit, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), SumInit, func.get(),
+               SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), SumInit, func.get(),
+               SimdLevel::NONE);
   AddArrayScalarAggKernels(SumInit, SignedIntTypes(), int64(), func.get());
   AddArrayScalarAggKernels(SumInit, UnsignedIntTypes(), uint64(), func.get());
   AddArrayScalarAggKernels(SumInit, FloatingPointTypes(), float64(), func.get());
@@ -965,12 +945,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
                                                    &default_scalar_aggregate_options);
   AddArrayScalarAggKernels(MeanInit, {boolean()}, float64(), func.get());
   AddArrayScalarAggKernels(MeanInit, NumericTypes(), float64(), func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      MeanInit, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      MeanInit, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), MeanInit, func.get(),
+               SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), MeanInit, func.get(),
+               SimdLevel::NONE);
   AddArrayScalarAggKernels(MeanInit, {null()}, float64(), func.get());
   // Add the SIMD variants for mean
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
@@ -1028,12 +1006,10 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   AddArrayScalarAggKernels(ProductInit::Init, UnsignedIntTypes(), uint64(), func.get());
   AddArrayScalarAggKernels(ProductInit::Init, FloatingPointTypes(), float64(),
                            func.get());
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL128)}, OutputType(ScalarFirstType)),
-      ProductInit::Init, func.get(), SimdLevel::NONE);
-  AddAggKernel(
-      KernelSignature::Make({InputType(Type::DECIMAL256)}, OutputType(ScalarFirstType)),
-      ProductInit::Init, func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL128}, FirstType), ProductInit::Init,
+               func.get(), SimdLevel::NONE);
+  AddAggKernel(KernelSignature::Make({Type::DECIMAL256}, FirstType), ProductInit::Init,
+               func.get(), SimdLevel::NONE);
   AddArrayScalarAggKernels(ProductInit::Init, {null()}, int64(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 00e3e2e5fd4..03b45107eec 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -37,7 +37,7 @@ struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -45,7 +45,7 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -55,10 +55,10 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
 
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::AVX2> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 8c10eb19b07..0d66ed2ec3e 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -37,7 +37,7 @@ struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -45,7 +45,7 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
-      ctx, args.inputs[0].type,
+      ctx, args.inputs[0].GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
@@ -55,10 +55,10 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
 
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
-  ARROW_ASSIGN_OR_RAISE(auto out_type,
+  ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::AVX512> visitor(
-      ctx, *args.inputs[0].type, std::move(out_type.type),
+      ctx, *args.inputs[0], out_type.GetSharedPtr(),
       static_cast<const ScalarAggregateOptions&>(*args.options));
   return visitor.Create();
 }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index a5b473793a9..6645e1a76bc 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -65,8 +65,7 @@ struct SumImpl : public ScalarAggregator {
   using SumCType = typename TypeTraits<SumType>::CType;
   using OutputType = typename TypeTraits<SumType>::ScalarType;
 
-  SumImpl(const std::shared_ptr<DataType>& out_type,
-          const ScalarAggregateOptions& options_)
+  SumImpl(std::shared_ptr<DataType> out_type, const ScalarAggregateOptions& options_)
       : out_type(out_type), options(options_) {}
 
   Status Consume(KernelContext*, const ExecBatch& batch) override {
@@ -216,10 +215,10 @@ template <template <typename> class KernelClass>
 struct SumLikeInit {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
-  const std::shared_ptr<DataType> type;
+  std::shared_ptr<DataType> type;
   const ScalarAggregateOptions& options;
 
-  SumLikeInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
               const ScalarAggregateOptions& options)
       : ctx(ctx), type(type), options(options) {}
 
@@ -261,7 +260,7 @@ struct SumLikeInit {
 
 template <template <typename> class KernelClass>
 struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, const std::shared_ptr<DataType>& type,
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
                  const ScalarAggregateOptions& options)
       : SumLikeInit<KernelClass>(ctx, type, options) {}
 
@@ -639,7 +638,7 @@ struct MinMaxInitState {
   std::unique_ptr<KernelState> state;
   KernelContext* ctx;
   const DataType& in_type;
-  const std::shared_ptr<DataType>& out_type;
+  std::shared_ptr<DataType> out_type;
   const ScalarAggregateOptions& options;
 
   MinMaxInitState(KernelContext* ctx, const DataType& in_type,
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 6676b86436a..c67ca31422d 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -64,7 +64,8 @@ Result<std::pair<CType*, int64_t*>> PrepareOutput(int64_t n, KernelContext* ctx,
     count_buffer = count_data->template GetMutableValues<int64_t>(1);
   }
 
-  out->value = ArrayData::Make(type.Copy(), n, {nullptr}, {mode_data, count_data}, 0);
+  out->value =
+      ArrayData::Make(type.GetSharedPtr(), n, {nullptr}, {mode_data, count_data}, 0);
   return std::make_pair(mode_buffer, count_buffer);
 }
 
@@ -465,9 +466,9 @@ struct ModeExecutorChunked {
   }
 };
 
-Result<ValueDescr> ModeType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(
-      struct_({field(kModeFieldName, descrs[0].type), field(kCountFieldName, int64())}));
+Result<TypeHolder> ModeType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return struct_(
+      {field(kModeFieldName, types[0].GetSharedPtr()), field(kCountFieldName, int64())});
 }
 
 VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type, ArrayKernelExec exec,
@@ -485,8 +486,7 @@ VectorKernel NewModeKernel(const std::shared_ptr<DataType>& in_type, ArrayKernel
     default: {
       auto out_type =
           struct_({field(kModeFieldName, in_type), field(kCountFieldName, int64())});
-      kernel.signature = KernelSignature::Make({InputType(in_type->id())},
-                                               ValueDescr::Array(std::move(out_type)));
+      kernel.signature = KernelSignature::Make({in_type->id()}, std::move(out_type));
       break;
     }
   }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index 7b989bfe5f5..921de15c316 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -174,7 +174,7 @@ struct SortQuantiler {
     // copy all chunks to a buffer, ignore nulls and nans
     std::vector<CType, Allocator> in_buffer(Allocator(ctx->memory_pool()));
     FillBuffer(options, values, values.length, values.GetNullCount(), &in_buffer);
-    return ComputeQuantile(ctx, options, values.type->Copy(), in_buffer, out);
+    return ComputeQuantile(ctx, options, values.type->GetSharedPtr(), in_buffer, out);
   }
 
   Status Exec(KernelContext* ctx, const ChunkedArray& values, Datum* out) {
@@ -546,13 +546,13 @@ struct QuantileExecutorChunked {
   }
 };
 
-Result<ValueDescr> ResolveOutput(KernelContext* ctx,
-                                 const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveOutput(KernelContext* ctx,
+                                 const std::vector<TypeHolder>& types) {
   const QuantileOptions& options = QuantileState::Get(ctx);
   if (IsDataPoint(options)) {
-    return ValueDescr::Array(args[0].type);
+    return types[0];
   } else {
-    return ValueDescr::Array(float64());
+    return float64();
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
index 037bba42f16..cfb7d3c3b35 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_tdigest.cc
@@ -210,21 +210,20 @@ std::shared_ptr<ScalarAggregateFunction> AddApproximateMedianAggKernels(
       "approximate_median", Arity::Unary(), approximate_median_doc,
       &default_scalar_aggregate_options);
 
-  auto sig =
-      KernelSignature::Make({InputType(ValueDescr::ANY)}, ValueDescr::Scalar(float64()));
+  auto sig = KernelSignature::Make({InputType::Any()}, float64());
 
   auto init = [tdigest_func](
                   KernelContext* ctx,
                   const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
-    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchBest(&inputs));
+    std::vector<TypeHolder> types = args.inputs;
+    ARROW_ASSIGN_OR_RAISE(auto kernel, tdigest_func->DispatchBest(&types));
     const auto& scalar_options =
         checked_cast<const ScalarAggregateOptions&>(*args.options);
     TDigestOptions options;
     // Default q = 0.5
     options.min_count = scalar_options.min_count;
     options.skip_nulls = scalar_options.skip_nulls;
-    KernelInitArgs new_args{kernel, inputs, &options};
+    KernelInitArgs new_args{kernel, types, &options};
     return kernel->init(ctx, new_args);
   };
 
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc
index 9e32f9e7f6d..66724727fd5 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc
@@ -52,81 +52,77 @@ const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes() {
   return example_parametric_types;
 }
 
-Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.front();
-  result.shape = GetBroadcastShape(descrs);
-  return result;
+Result<TypeHolder> FirstType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return types.front();
 }
 
-Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  ValueDescr result = descrs.back();
-  result.shape = GetBroadcastShape(descrs);
-  return result;
+Result<TypeHolder> LastType(KernelContext*, const std::vector<TypeHolder>& types) {
+  return types.back();
 }
 
-Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ListValuesType(KernelContext*, const std::vector<TypeHolder>& args) {
   const auto& list_type = checked_cast<const BaseListType&>(*args[0].type);
-  return ValueDescr(list_type.value_type(), GetBroadcastShape(args));
+  return list_type.value_type().get();
 }
 
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs) {
-  EnsureDictionaryDecoded(descrs->data(), descrs->size());
+void EnsureDictionaryDecoded(std::vector<TypeHolder>* types) {
+  EnsureDictionaryDecoded(types->data(), types->size());
 }
 
-void EnsureDictionaryDecoded(ValueDescr* begin, size_t count) {
+void EnsureDictionaryDecoded(TypeHolder* begin, size_t count) {
   auto* end = begin + count;
   for (auto it = begin; it != end; it++) {
-    if (it->type->id() == Type::DICTIONARY) {
-      it->type = checked_cast<const DictionaryType&>(*it->type).value_type();
+    if (it->id() == Type::DICTIONARY) {
+      *it = checked_cast<const DictionaryType&>(*it->type).value_type();
     }
   }
 }
 
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs) {
-  ReplaceNullWithOtherType(descrs->data(), descrs->size());
+void ReplaceNullWithOtherType(std::vector<TypeHolder>* types) {
+  ReplaceNullWithOtherType(types->data(), types->size());
 }
 
-void ReplaceNullWithOtherType(ValueDescr* first, size_t count) {
+void ReplaceNullWithOtherType(TypeHolder* first, size_t count) {
   DCHECK_EQ(count, 2);
 
-  ValueDescr* second = first++;
+  TypeHolder* second = first++;
   if (first->type->id() == Type::NA) {
-    first->type = second->type;
+    *first = *second;
     return;
   }
 
   if (second->type->id() == Type::NA) {
-    second->type = first->type;
+    *second = *first;
     return;
   }
 }
 
-void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<ValueDescr>* descrs) {
-  auto* end = descrs->data() + descrs->size();
+void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<TypeHolder>* types) {
+  auto* end = types->data() + types->size();
 
-  for (auto* it = descrs->data(); it != end; it++) {
+  for (auto* it = types->data(); it != end; it++) {
     switch (it->type->id()) {
       case Type::TIMESTAMP: {
         const auto& ty = checked_cast<const TimestampType&>(*it->type);
-        it->type = timestamp(unit, ty.timezone());
+        *it = timestamp(unit, ty.timezone());
         continue;
       }
       case Type::TIME32:
       case Type::TIME64: {
         if (unit > TimeUnit::MILLI) {
-          it->type = time64(unit);
+          *it = time64(unit);
         } else {
-          it->type = time32(unit);
+          *it = time32(unit);
         }
         continue;
       }
       case Type::DURATION: {
-        it->type = duration(unit);
+        *it = duration(unit);
         continue;
       }
       case Type::DATE32:
       case Type::DATE64: {
-        it->type = timestamp(unit);
+        *it = timestamp(unit);
         continue;
       }
       default:
@@ -135,29 +131,27 @@ void ReplaceTemporalTypes(const TimeUnit::type unit, std::vector<ValueDescr>* de
   }
 }
 
-void ReplaceTypes(const std::shared_ptr<DataType>& type,
-                  std::vector<ValueDescr>* descrs) {
-  ReplaceTypes(type, descrs->data(), descrs->size());
+void ReplaceTypes(const TypeHolder& replacement, std::vector<TypeHolder>* types) {
+  ReplaceTypes(replacement, types->data(), types->size());
 }
 
-void ReplaceTypes(const std::shared_ptr<DataType>& type, ValueDescr* begin,
-                  size_t count) {
+void ReplaceTypes(const TypeHolder& replacement, TypeHolder* begin, size_t count) {
   auto* end = begin + count;
   for (auto* it = begin; it != end; it++) {
-    it->type = type;
+    *it = replacement;
   }
 }
 
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs) {
-  return CommonNumeric(descrs.data(), descrs.size());
+TypeHolder CommonNumeric(const std::vector<TypeHolder>& types) {
+  return CommonNumeric(types.data(), types.size());
 }
 
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
+TypeHolder CommonNumeric(const TypeHolder* begin, size_t count) {
   DCHECK_GT(count, 0) << "tried to find CommonNumeric type of an empty set";
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    auto id = descr.type->id();
+    const auto& holder = *(begin + i);
+    auto id = holder.id();
     if (!is_floating(id) && !is_integer(id)) {
       // a common numeric type is only possible if all types are numeric
       return nullptr;
@@ -169,20 +163,20 @@ std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
   }
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    if (descr.type->id() == Type::DOUBLE) return float64();
+    const auto& holder = *(begin + i);
+    if (holder.id() == Type::DOUBLE) return float64();
   }
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    if (descr.type->id() == Type::FLOAT) return float32();
+    const auto& holder = *(begin + i);
+    if (holder.id() == Type::FLOAT) return float32();
   }
 
   int max_width_signed = 0, max_width_unsigned = 0;
 
   for (size_t i = 0; i < count; i++) {
-    const auto& descr = *(begin + i);
-    auto id = descr.type->id();
+    const auto& holder = *(begin + i);
+    auto id = holder.id();
     auto max_width = &(is_signed_integer(id) ? max_width_signed : max_width_unsigned);
     *max_width = std::max(bit_width(id), *max_width);
   }
@@ -206,11 +200,11 @@ std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count) {
   return int8();
 }
 
-bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
+bool CommonTemporalResolution(const TypeHolder* begin, size_t count,
                               TimeUnit::type* finest_unit) {
   bool is_time_unit = false;
   *finest_unit = TimeUnit::SECOND;
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; it++) {
     auto id = it->type->id();
     switch (id) {
@@ -255,13 +249,13 @@ bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
   return is_time_unit;
 }
 
-std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count) {
+TypeHolder CommonTemporal(const TypeHolder* begin, size_t count) {
   TimeUnit::type finest_unit = TimeUnit::SECOND;
   const std::string* timezone = nullptr;
   bool saw_date32 = false;
   bool saw_date64 = false;
 
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; it++) {
     auto id = it->type->id();
     // a common timestamp is only possible if all types are timestamp like
@@ -276,13 +270,13 @@ std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count)
         continue;
       case Type::TIMESTAMP: {
         const auto& ty = checked_cast<const TimestampType&>(*it->type);
-        if (timezone && *timezone != ty.timezone()) return nullptr;
+        if (timezone && *timezone != ty.timezone()) return TypeHolder(nullptr);
         timezone = &ty.timezone();
         finest_unit = std::max(finest_unit, ty.unit());
         continue;
       }
       default:
-        return nullptr;
+        return TypeHolder(nullptr);
     }
   }
 
@@ -294,13 +288,13 @@ std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count)
   } else if (saw_date32) {
     return date32();
   }
-  return nullptr;
+  return TypeHolder(nullptr);
 }
 
-std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
+TypeHolder CommonBinary(const TypeHolder* begin, size_t count) {
   bool all_utf8 = true, all_offset32 = true, all_fixed_width = true;
 
-  const ValueDescr* end = begin + count;
+  const TypeHolder* end = begin + count;
   for (auto it = begin; it != end; ++it) {
     auto id = it->type->id();
     // a common varbinary type is only possible if all types are binary like
@@ -325,13 +319,13 @@ std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
         all_utf8 = false;
         continue;
       default:
-        return nullptr;
+        return TypeHolder(nullptr);
     }
   }
 
   if (all_fixed_width) {
     // At least for the purposes of comparison, no need to cast.
-    return nullptr;
+    return TypeHolder(nullptr);
   }
 
   if (all_utf8) {
@@ -343,18 +337,17 @@ std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count) {
   return large_binary();
 }
 
-Status CastBinaryDecimalArgs(DecimalPromotion promotion,
-                             std::vector<ValueDescr>* descrs) {
-  auto& left_type = (*descrs)[0].type;
-  auto& right_type = (*descrs)[1].type;
-  DCHECK(is_decimal(left_type->id()) || is_decimal(right_type->id()));
+Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<TypeHolder>* types) {
+  const DataType& left_type = *(*types)[0];
+  const DataType& right_type = *(*types)[1];
+  DCHECK(is_decimal(left_type.id()) || is_decimal(right_type.id()));
 
   // decimal + float = float
-  if (is_floating(left_type->id())) {
-    right_type = left_type;
+  if (is_floating(left_type.id())) {
+    (*types)[1] = (*types)[0];
     return Status::OK();
-  } else if (is_floating(right_type->id())) {
-    left_type = right_type;
+  } else if (is_floating(right_type.id())) {
+    (*types)[0] = (*types)[1];
     return Status::OK();
   }
 
@@ -362,22 +355,22 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
   int32_t p1, s1, p2, s2;
 
   // decimal + integer = decimal
-  if (is_decimal(left_type->id())) {
-    auto decimal = checked_cast<const DecimalType*>(left_type.get());
-    p1 = decimal->precision();
-    s1 = decimal->scale();
+  if (is_decimal(left_type.id())) {
+    const auto& decimal = checked_cast<const DecimalType&>(left_type);
+    p1 = decimal.precision();
+    s1 = decimal.scale();
   } else {
-    DCHECK(is_integer(left_type->id()));
-    ARROW_ASSIGN_OR_RAISE(p1, MaxDecimalDigitsForInteger(left_type->id()));
+    DCHECK(is_integer(left_type.id()));
+    ARROW_ASSIGN_OR_RAISE(p1, MaxDecimalDigitsForInteger(left_type.id()));
     s1 = 0;
   }
-  if (is_decimal(right_type->id())) {
-    auto decimal = checked_cast<const DecimalType*>(right_type.get());
-    p2 = decimal->precision();
-    s2 = decimal->scale();
+  if (is_decimal(right_type.id())) {
+    const auto& decimal = checked_cast<const DecimalType&>(right_type);
+    p2 = decimal.precision();
+    s2 = decimal.scale();
   } else {
-    DCHECK(is_integer(right_type->id()));
-    ARROW_ASSIGN_OR_RAISE(p2, MaxDecimalDigitsForInteger(right_type->id()));
+    DCHECK(is_integer(right_type.id()));
+    ARROW_ASSIGN_OR_RAISE(p2, MaxDecimalDigitsForInteger(right_type.id()));
     s2 = 0;
   }
   if (s1 < 0 || s2 < 0) {
@@ -386,7 +379,7 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
 
   // decimal128 + decimal256 = decimal256
   Type::type casted_type_id = Type::DECIMAL128;
-  if (left_type->id() == Type::DECIMAL256 || right_type->id() == Type::DECIMAL256) {
+  if (left_type.id() == Type::DECIMAL256 || right_type.id() == Type::DECIMAL256) {
     casted_type_id = Type::DECIMAL256;
   }
 
@@ -414,15 +407,19 @@ Status CastBinaryDecimalArgs(DecimalPromotion promotion,
       DCHECK(false) << "Invalid DecimalPromotion value " << static_cast<int>(promotion);
   }
   ARROW_ASSIGN_OR_RAISE(
-      left_type, DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
-  ARROW_ASSIGN_OR_RAISE(right_type, DecimalType::Make(casted_type_id, p2 + right_scaleup,
-                                                      s2 + right_scaleup));
+      auto casted_left,
+      DecimalType::Make(casted_type_id, p1 + left_scaleup, s1 + left_scaleup));
+  ARROW_ASSIGN_OR_RAISE(
+      auto casted_right,
+      DecimalType::Make(casted_type_id, p2 + right_scaleup, s2 + right_scaleup));
+  (*types)[0] = casted_left;
+  (*types)[1] = casted_right;
   return Status::OK();
 }
 
-Status CastDecimalArgs(ValueDescr* begin, size_t count) {
+Status CastDecimalArgs(TypeHolder* begin, size_t count) {
   Type::type casted_type_id = Type::DECIMAL128;
-  auto* end = begin + count;
+  TypeHolder* end = begin + count;
 
   int32_t max_scale = 0;
   bool any_floating = false;
@@ -473,17 +470,17 @@ Status CastDecimalArgs(ValueDescr* begin, size_t count) {
     casted_type_id = Type::DECIMAL256;
   }
 
+  ARROW_ASSIGN_OR_RAISE(auto casted_ty,
+                        DecimalType::Make(casted_type_id, common_precision, max_scale));
   for (auto* it = begin; it != end; ++it) {
-    ARROW_ASSIGN_OR_RAISE(it->type,
-                          DecimalType::Make(casted_type_id, common_precision, max_scale));
+    *it = casted_ty;
   }
-
   return Status::OK();
 }
 
-bool HasDecimal(const std::vector<ValueDescr>& descrs) {
-  for (const auto& descr : descrs) {
-    if (is_decimal(descr.type->id())) {
+bool HasDecimal(const std::vector<TypeHolder>& types) {
+  for (const auto& th : types) {
+    if (is_decimal(th.id())) {
       return true;
     }
   }
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index bc21c4efb6a..1d5f5dd9bd5 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -452,9 +452,9 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar
 // ----------------------------------------------------------------------
 // Reusable type resolvers
 
-Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& descrs);
-Result<ValueDescr> LastType(KernelContext*, const std::vector<ValueDescr>& descrs);
-Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>& args);
+Result<TypeHolder> FirstType(KernelContext*, const std::vector<TypeHolder>& types);
+Result<TypeHolder> LastType(KernelContext*, const std::vector<TypeHolder>& types);
+Result<TypeHolder> ListValuesType(KernelContext*, const std::vector<TypeHolder>& types);
 
 // ----------------------------------------------------------------------
 // Helpers for iterating over common DataType instances for adding kernels to
@@ -479,29 +479,11 @@ Result<ValueDescr> ListValuesType(KernelContext*, const std::vector<ValueDescr>&
 const std::vector<std::shared_ptr<DataType>>& ExampleParametricTypes();
 
 // ----------------------------------------------------------------------
-// "Applicators" take an operator definition (which may be scalar-valued or
-// array-valued) and creates an ArrayKernelExec which can be used to add an
-// ArrayKernel to a Function.
+// "Applicators" take an operator definition and creates an ArrayKernelExec
+// which can be used to add a kernel to a Function.
 
 namespace applicator {
 
-// Generate an ArrayKernelExec given a functor that handles all of its own
-// iteration, etc.
-//
-// Operator must implement
-//
-// static Status Call(KernelContext*, const ArraySpan& in, ExecResult* out)
-// static Status Call(KernelContext*, const Scalar& in, ExecResult* out)
-template <typename Operator>
-static Status SimpleUnary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (batch[0].is_scalar()) {
-    return Operator::Call(ctx, *batch[0].scalar, out);
-  } else if (batch.length > 0) {
-    return Operator::Call(ctx, batch[0].array, out);
-  }
-  return Status::OK();
-}
-
 // Generate an ArrayKernelExec given a functor that handles all of its own
 // iteration, etc.
 //
@@ -513,8 +495,6 @@ static Status SimpleUnary(KernelContext* ctx, const ExecSpan& batch, ExecResult*
 //                    ExecResult* out)
 // static Status Call(KernelContext*, const Scalar& arg0, const ArraySpan& arg1,
 //                    ExecResult* out)
-// static Status Call(KernelContext*, const Scalar& arg0, const Scalar& arg1,
-//                    ExecResult* out)
 template <typename Operator>
 static Status SimpleBinary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   if (batch.length == 0) return Status::OK();
@@ -529,7 +509,8 @@ static Status SimpleBinary(KernelContext* ctx, const ExecSpan& batch, ExecResult
     if (batch[1].is_array()) {
       return Operator::Call(ctx, *batch[0].scalar, batch[1].array, out);
     } else {
-      return Operator::Call(ctx, *batch[0].scalar, *batch[1].scalar, out);
+      DCHECK(false);
+      return Status::Invalid("Should be unreachable");
     }
   }
 }
@@ -597,7 +578,9 @@ struct ScalarUnary {
   using OutValue = typename GetOutputType<OutType>::T;
   using Arg0Value = typename GetViewType<Arg0Type>::T;
 
-  static Status ExecArray(KernelContext* ctx, const ArraySpan& arg0, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    DCHECK(batch[0].is_array());
+    const ArraySpan& arg0 = batch[0].array;
     Status st = Status::OK();
     ArrayIterator<Arg0Type> arg0_it(arg0);
     RETURN_NOT_OK(
@@ -606,28 +589,6 @@ struct ScalarUnary {
         }));
     return st;
   }
-
-  static Status ExecScalar(KernelContext* ctx, const Scalar& arg0, ExecResult* out) {
-    Status st = Status::OK();
-    Scalar* out_scalar = out->scalar().get();
-    if (arg0.is_valid) {
-      Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      out_scalar->is_valid = true;
-      BoxScalar<OutType>::Box(Op::template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
-                              out_scalar);
-    } else {
-      out_scalar->is_valid = false;
-    }
-    return st;
-  }
-
-  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, batch[0].array, out);
-    } else {
-      return ExecScalar(ctx, *batch[0].scalar, out);
-    }
-  }
 };
 
 // An alternative to ScalarUnary that Applies a scalar operation with state on
@@ -720,23 +681,9 @@ struct ScalarUnaryNotNullStateful {
     }
   };
 
-  Status Scalar(KernelContext* ctx, const Scalar& arg0, ExecResult* out) {
-    Status st = Status::OK();
-    if (arg0.is_valid) {
-      Arg0Value arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      BoxScalar<OutType>::Box(
-          this->op.template Call<OutValue, Arg0Value>(ctx, arg0_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ArrayExec<OutType>::Exec(*this, ctx, batch[0].array, out);
-    } else {
-      return Scalar(ctx, *batch[0].scalar, out);
-    }
+    DCHECK(batch[0].is_array());
+    return ArrayExec<OutType>::Exec(*this, ctx, batch[0].array, out);
   }
 };
 
@@ -819,19 +766,6 @@ struct ScalarBinary {
     return st;
   }
 
-  static Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                             ExecResult* out) {
-    Status st = Status::OK();
-    if (out->scalar()->is_valid) {
-      auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(
-          Op::template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_array()) {
       if (batch[1].is_array()) {
@@ -843,7 +777,8 @@ struct ScalarBinary {
       if (batch[1].is_array()) {
         return ScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
       } else {
-        return ScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
+        DCHECK(false);
+        return Status::Invalid("Should be unreachable");
       }
     }
   }
@@ -916,19 +851,6 @@ struct ScalarBinaryNotNullStateful {
     return st;
   }
 
-  Status ScalarScalar(KernelContext* ctx, const Scalar& arg0, const Scalar& arg1,
-                      ExecResult* out) {
-    Status st = Status::OK();
-    if (arg0.is_valid && arg1.is_valid) {
-      const auto arg0_val = UnboxScalar<Arg0Type>::Unbox(arg0);
-      const auto arg1_val = UnboxScalar<Arg1Type>::Unbox(arg1);
-      BoxScalar<OutType>::Box(
-          op.template Call<OutValue, Arg0Value, Arg1Value>(ctx, arg0_val, arg1_val, &st),
-          out->scalar().get());
-    }
-    return st;
-  }
-
   Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_array()) {
       if (batch[1].is_array()) {
@@ -940,7 +862,8 @@ struct ScalarBinaryNotNullStateful {
       if (batch[1].is_array()) {
         return ScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
       } else {
-        return ScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
+        DCHECK(false);
+        return Status::Invalid("Should be unreachable");
       }
     }
   }
@@ -1413,41 +1336,41 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) {
 // ----------------------------------------------------------------------
 
 ARROW_EXPORT
-void EnsureDictionaryDecoded(std::vector<ValueDescr>* descrs);
+void EnsureDictionaryDecoded(std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void EnsureDictionaryDecoded(ValueDescr* begin, size_t count);
+void EnsureDictionaryDecoded(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-void ReplaceNullWithOtherType(std::vector<ValueDescr>* descrs);
+void ReplaceNullWithOtherType(std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void ReplaceNullWithOtherType(ValueDescr* begin, size_t count);
+void ReplaceNullWithOtherType(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-void ReplaceTypes(const std::shared_ptr<DataType>&, std::vector<ValueDescr>* descrs);
+void ReplaceTypes(const TypeHolder& replacement, std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-void ReplaceTypes(const std::shared_ptr<DataType>&, ValueDescr* descrs, size_t count);
+void ReplaceTypes(const TypeHolder& replacement, TypeHolder* types, size_t count);
 
 ARROW_EXPORT
-void ReplaceTemporalTypes(TimeUnit::type unit, std::vector<ValueDescr>* descrs);
+void ReplaceTemporalTypes(TimeUnit::type unit, std::vector<TypeHolder>* types);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonNumeric(const std::vector<ValueDescr>& descrs);
+TypeHolder CommonNumeric(const std::vector<TypeHolder>& types);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonNumeric(const ValueDescr* begin, size_t count);
+TypeHolder CommonNumeric(const TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonTemporal(const ValueDescr* begin, size_t count);
+TypeHolder CommonTemporal(const TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-bool CommonTemporalResolution(const ValueDescr* begin, size_t count,
+bool CommonTemporalResolution(const TypeHolder* begin, size_t count,
                               TimeUnit::type* finest_unit);
 
 ARROW_EXPORT
-std::shared_ptr<DataType> CommonBinary(const ValueDescr* begin, size_t count);
+TypeHolder CommonBinary(const TypeHolder* begin, size_t count);
 
 /// How to promote decimal precision/scale in CastBinaryDecimalArgs.
 enum class DecimalPromotion : uint8_t {
@@ -1460,15 +1383,15 @@ enum class DecimalPromotion : uint8_t {
 /// to not necessarily identical types, but types which are compatible
 /// for the given operator (add/multiply/divide).
 ARROW_EXPORT
-Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<ValueDescr>* descrs);
+Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector<TypeHolder>* types);
 
 /// Given one or more arguments, at least one of which is decimal,
 /// promote all to an identical type.
 ARROW_EXPORT
-Status CastDecimalArgs(ValueDescr* begin, size_t count);
+Status CastDecimalArgs(TypeHolder* begin, size_t count);
 
 ARROW_EXPORT
-bool HasDecimal(const std::vector<ValueDescr>& descrs);
+bool HasDecimal(const std::vector<TypeHolder>& types);
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
index 6fb84cf55b3..062100dc148 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
+++ b/cpp/src/arrow/compute/kernels/codegen_internal_test.cc
@@ -28,7 +28,7 @@ namespace compute {
 namespace internal {
 
 TEST(TestDispatchBest, CastBinaryDecimalArgs) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::vector<DecimalPromotion> modes = {
       DecimalPromotion::kAdd, DecimalPromotion::kMultiply, DecimalPromotion::kDivide};
 
@@ -36,15 +36,15 @@ TEST(TestDispatchBest, CastBinaryDecimalArgs) {
   for (auto mode : modes) {
     args = {decimal128(3, 2), float64()};
     ASSERT_OK(CastBinaryDecimalArgs(mode, &args));
-    AssertTypeEqual(args[0].type, float64());
-    AssertTypeEqual(args[1].type, float64());
+    AssertTypeEqual(*args[0], *float64());
+    AssertTypeEqual(*args[1], *float64());
   }
 
   // Integer -> decimal with common scale
   args = {decimal128(1, 0), int64()};
   ASSERT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, &args));
-  AssertTypeEqual(args[0].type, decimal128(1, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(1, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   // Add: rescale so all have common scale
   args = {decimal128(3, 2), decimal128(3, -2)};
@@ -54,64 +54,64 @@ TEST(TestDispatchBest, CastBinaryDecimalArgs) {
 }
 
 TEST(TestDispatchBest, CastDecimalArgs) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
 
   // Any float -> all float
   args = {decimal128(3, 2), float64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, float64());
-  AssertTypeEqual(args[1].type, float64());
+  AssertTypeEqual(*args[0], *float64());
+  AssertTypeEqual(*args[1], *float64());
 
   args = {float32(), float64(), decimal128(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, float64());
-  AssertTypeEqual(args[1].type, float64());
-  AssertTypeEqual(args[2].type, float64());
+  AssertTypeEqual(*args[0], *float64());
+  AssertTypeEqual(*args[1], *float64());
+  AssertTypeEqual(*args[2], *float64());
 
   // Promote to common decimal width
   args = {decimal128(3, 2), decimal256(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(3, 2));
-  AssertTypeEqual(args[1].type, decimal256(3, 2));
+  AssertTypeEqual(*args[0], *decimal256(3, 2));
+  AssertTypeEqual(*args[1], *decimal256(3, 2));
 
   // Rescale so all have common scale/precision
   args = {decimal128(3, 2), decimal128(3, 0)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(5, 2));
-  AssertTypeEqual(args[1].type, decimal128(5, 2));
+  AssertTypeEqual(*args[0], *decimal128(5, 2));
+  AssertTypeEqual(*args[1], *decimal128(5, 2));
 
   args = {decimal128(3, 2), decimal128(3, -2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(7, 2));
-  AssertTypeEqual(args[1].type, decimal128(7, 2));
+  AssertTypeEqual(*args[0], *decimal128(7, 2));
+  AssertTypeEqual(*args[1], *decimal128(7, 2));
 
   args = {decimal128(3, 0), decimal128(3, 1), decimal128(3, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(5, 2));
-  AssertTypeEqual(args[1].type, decimal128(5, 2));
-  AssertTypeEqual(args[2].type, decimal128(5, 2));
+  AssertTypeEqual(*args[0], *decimal128(5, 2));
+  AssertTypeEqual(*args[1], *decimal128(5, 2));
+  AssertTypeEqual(*args[2], *decimal128(5, 2));
 
   // Integer -> decimal with appropriate precision
   args = {decimal128(3, 0), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(19, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(19, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   args = {decimal128(3, 1), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(20, 1));
-  AssertTypeEqual(args[1].type, decimal128(20, 1));
+  AssertTypeEqual(*args[0], *decimal128(20, 1));
+  AssertTypeEqual(*args[1], *decimal128(20, 1));
 
   args = {decimal128(3, -1), int64()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal128(19, 0));
-  AssertTypeEqual(args[1].type, decimal128(19, 0));
+  AssertTypeEqual(*args[0], *decimal128(19, 0));
+  AssertTypeEqual(*args[1], *decimal128(19, 0));
 
   // Overflow decimal128 max precision -> promote to decimal256
   args = {decimal128(38, 0), decimal128(37, 2)};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(40, 2));
-  AssertTypeEqual(args[1].type, decimal256(40, 2));
+  AssertTypeEqual(*args[0], *decimal256(40, 2));
+  AssertTypeEqual(*args[1], *decimal256(40, 2));
 
   // Overflow decimal256 max precision
   args = {decimal256(76, 0), decimal256(75, 1)};
@@ -124,44 +124,45 @@ TEST(TestDispatchBest, CastDecimalArgs) {
   // Incompatible, no cast
   args = {decimal256(3, 2), float64(), utf8()};
   ASSERT_OK(CastDecimalArgs(args.data(), args.size()));
-  AssertTypeEqual(args[0].type, decimal256(3, 2));
-  AssertTypeEqual(args[1].type, float64());
-  AssertTypeEqual(args[2].type, utf8());
+  AssertTypeEqual(*args[0], *decimal256(3, 2));
+  AssertTypeEqual(*args[1], *float64());
+  AssertTypeEqual(*args[2], *utf8());
 }
 
 TEST(TestDispatchBest, CommonTemporal) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
 
   args = {timestamp(TimeUnit::SECOND), timestamp(TimeUnit::NANO)};
-  AssertTypeEqual(timestamp(TimeUnit::NANO), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));
   args = {timestamp(TimeUnit::SECOND, "UTC"), timestamp(TimeUnit::NANO, "UTC")};
-  AssertTypeEqual(timestamp(TimeUnit::NANO, "UTC"),
-                  CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO, "UTC"),
+                  *CommonTemporal(args.data(), args.size()));
   args = {timestamp(TimeUnit::SECOND), date32()};
-  AssertTypeEqual(timestamp(TimeUnit::SECOND), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::SECOND),
+                  *CommonTemporal(args.data(), args.size()));
   args = {date32(), timestamp(TimeUnit::NANO)};
-  AssertTypeEqual(timestamp(TimeUnit::NANO), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::NANO), *CommonTemporal(args.data(), args.size()));
   args = {date64(), timestamp(TimeUnit::SECOND)};
-  AssertTypeEqual(timestamp(TimeUnit::MILLI), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*timestamp(TimeUnit::MILLI), *CommonTemporal(args.data(), args.size()));
   args = {date32(), date32()};
-  AssertTypeEqual(date32(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date32(), *CommonTemporal(args.data(), args.size()));
   args = {date64(), date64()};
-  AssertTypeEqual(date64(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date64(), *CommonTemporal(args.data(), args.size()));
   args = {date32(), date64()};
-  AssertTypeEqual(date64(), CommonTemporal(args.data(), args.size()));
+  AssertTypeEqual(*date64(), *CommonTemporal(args.data(), args.size()));
   args = {};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {float64(), int32()};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {timestamp(TimeUnit::SECOND), timestamp(TimeUnit::SECOND, "UTC")};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
   args = {timestamp(TimeUnit::SECOND, "America/Phoenix"),
           timestamp(TimeUnit::SECOND, "UTC")};
-  ASSERT_EQ(nullptr, CommonTemporal(args.data(), args.size()));
+  ASSERT_EQ(CommonTemporal(args.data(), args.size()), nullptr);
 }
 
 TEST(TestDispatchBest, CommonTemporalResolution) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::string tz = "Pacific/Marquesas";
   TimeUnit::type ty;
 
@@ -240,75 +241,75 @@ TEST(TestDispatchBest, CommonTemporalResolution) {
 }
 
 TEST(TestDispatchBest, ReplaceTemporalTypes) {
-  std::vector<ValueDescr> args;
+  std::vector<TypeHolder> args;
   std::string tz = "Pacific/Marquesas";
   TimeUnit::type ty;
 
   args = {date32(), date32()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::SECOND));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::SECOND));
 
   args = {date64(), time32(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::MILLI));
-  AssertTypeEqual(args[1].type, time32(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[1], *time32(TimeUnit::MILLI));
 
   args = {duration(TimeUnit::SECOND), date64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, duration(TimeUnit::MILLI));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *duration(TimeUnit::MILLI));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::MILLI));
 
   args = {timestamp(TimeUnit::MICRO, tz), timestamp(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::NANO, tz));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::NANO, tz));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::NANO));
 
   args = {timestamp(TimeUnit::MICRO, tz), time64(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::NANO, tz));
-  AssertTypeEqual(args[1].type, time64(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::NANO, tz));
+  AssertTypeEqual(*args[1], *time64(TimeUnit::NANO));
 
   args = {timestamp(TimeUnit::SECOND, tz), date64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::MILLI, tz));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::MILLI));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::MILLI, tz));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::MILLI));
 
   args = {timestamp(TimeUnit::SECOND, "UTC"), timestamp(TimeUnit::SECOND, tz)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, timestamp(TimeUnit::SECOND, "UTC"));
-  AssertTypeEqual(args[1].type, timestamp(TimeUnit::SECOND, tz));
+  AssertTypeEqual(*args[0], *timestamp(TimeUnit::SECOND, "UTC"));
+  AssertTypeEqual(*args[1], *timestamp(TimeUnit::SECOND, tz));
 
   args = {time32(TimeUnit::SECOND), duration(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time32(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::SECOND));
+  AssertTypeEqual(*args[0], *time32(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::SECOND));
 
   args = {time64(TimeUnit::MICRO), duration(TimeUnit::SECOND)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time64(TimeUnit::MICRO));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::MICRO));
+  AssertTypeEqual(*args[0], *time64(TimeUnit::MICRO));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::MICRO));
 
   args = {time32(TimeUnit::SECOND), duration(TimeUnit::NANO)};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, time64(TimeUnit::NANO));
-  AssertTypeEqual(args[1].type, duration(TimeUnit::NANO));
+  AssertTypeEqual(*args[0], *time64(TimeUnit::NANO));
+  AssertTypeEqual(*args[1], *duration(TimeUnit::NANO));
 
   args = {duration(TimeUnit::SECOND), int64()};
   ASSERT_TRUE(CommonTemporalResolution(args.data(), args.size(), &ty));
   ReplaceTemporalTypes(ty, &args);
-  AssertTypeEqual(args[0].type, duration(TimeUnit::SECOND));
-  AssertTypeEqual(args[1].type, int64());
+  AssertTypeEqual(*args[0], *duration(TimeUnit::SECOND));
+  AssertTypeEqual(*args[1], *int64());
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index de632935955..49c88324a91 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -68,8 +68,7 @@ namespace {
 /// Implementations should be default constructible and perform initialization in
 /// Init().
 struct GroupedAggregator : KernelState {
-  virtual Status Init(ExecContext*, const std::vector<ValueDescr>& inputs,
-                      const FunctionOptions*) = 0;
+  virtual Status Init(ExecContext*, const KernelInitArgs& args) = 0;
 
   virtual Status Resize(int64_t new_num_groups) = 0;
 
@@ -86,7 +85,7 @@ template <typename Impl>
 Result<std::unique_ptr<KernelState>> HashAggregateInit(KernelContext* ctx,
                                                        const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<Impl>();
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args));
   return std::move(impl);
 }
 
@@ -105,15 +104,17 @@ Status HashAggregateFinalize(KernelContext* ctx, Datum* out) {
   return checked_cast<GroupedAggregator*>(ctx->state())->Finalize().Value(out);
 }
 
+Result<TypeHolder> ResolveGroupOutputType(KernelContext* ctx,
+                                          const std::vector<TypeHolder>&) {
+  return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
+}
+
 HashAggregateKernel MakeKernel(InputType argument_type, KernelInit init) {
   HashAggregateKernel kernel;
   kernel.init = std::move(init);
-  kernel.signature = KernelSignature::Make(
-      {std::move(argument_type), InputType::Array(Type::UINT32)},
-      OutputType(
-          [](KernelContext* ctx, const std::vector<ValueDescr>&) -> Result<ValueDescr> {
-            return checked_cast<GroupedAggregator*>(ctx->state())->out_type();
-          }));
+  kernel.signature =
+      KernelSignature::Make({std::move(argument_type), InputType(Type::UINT32)},
+                            OutputType(ResolveGroupOutputType));
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -224,9 +225,8 @@ void VisitGroupedValuesNonNull(const ExecBatch& batch, ConsumeValue&& valid_func
 // Count implementation
 
 struct GroupedCountImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = checked_cast<const CountOptions&>(*options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = checked_cast<const CountOptions&>(*args.options);
     counts_ = BufferBuilder(ctx->memory_pool());
     return Status::OK();
   }
@@ -320,14 +320,13 @@ struct GroupedReducingAggregator : public GroupedAggregator {
   using CType = typename TypeTraits<AccType>::CType;
   using InputCType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     reduced_ = TypedBufferBuilder<CType>(pool_);
     counts_ = TypedBufferBuilder<int64_t>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
-    out_type_ = GetOutType(inputs[0].type);
+    out_type_ = GetOutType(args.inputs[0].GetSharedPtr());
     return Status::OK();
   }
 
@@ -447,10 +446,9 @@ struct GroupedReducingAggregator : public GroupedAggregator {
 };
 
 struct GroupedNullImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     return Status::OK();
   }
 
@@ -519,7 +517,7 @@ struct GroupedReducingFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedReducingFactory<Impl, kFriendlyName, NullImpl> factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -705,14 +703,14 @@ template <typename Type>
 struct GroupedVarStdImpl : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const VarianceOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const VarianceOptions*>(args.options);
     if (is_decimal_type<Type>::value) {
-      const int32_t scale = checked_cast<const DecimalType&>(*inputs[0].type).scale();
-      return InitInternal(ctx, scale, options);
+      const int32_t scale =
+          checked_cast<const DecimalType&>(*args.inputs[0].type).scale();
+      return InitInternal(ctx, scale, args.options);
     }
-    return InitInternal(ctx, 0, options);
+    return InitInternal(ctx, 0, args.options);
   }
 
   Status InitInternal(ExecContext* ctx, int32_t decimal_scale,
@@ -976,7 +974,7 @@ Result<std::unique_ptr<KernelState>> VarStdInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   auto impl = ::arrow::internal::make_unique<GroupedVarStdImpl<T>>();
   impl->result_type_ = result_type;
-  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args.inputs, args.options));
+  RETURN_NOT_OK(impl->Init(ctx->exec_context(), args));
   return std::move(impl);
 }
 
@@ -1000,7 +998,7 @@ struct GroupedVarStdFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedVarStdFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1018,11 +1016,10 @@ template <typename Type>
 struct GroupedTDigestImpl : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>& inputs,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const TDigestOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const TDigestOptions*>(args.options);
     if (is_decimal_type<Type>::value) {
-      decimal_scale_ = checked_cast<const DecimalType&>(*inputs[0].type).scale();
+      decimal_scale_ = checked_cast<const DecimalType&>(*args.inputs[0].type).scale();
     } else {
       decimal_scale_ = 0;
     }
@@ -1163,7 +1160,7 @@ struct GroupedTDigestFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedTDigestFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1187,9 +1184,7 @@ HashAggregateKernel MakeApproximateMedianKernel(HashAggregateFunction* tdigest_f
     KernelInitArgs new_args{kernel, args.inputs, &options};
     return kernel->init(ctx, new_args);
   };
-  kernel.signature =
-      KernelSignature::Make({InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
-                            ValueDescr::Array(float64()));
+  kernel.signature = KernelSignature::Make({InputType::Any(), Type::UINT32}, float64());
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -1248,9 +1243,8 @@ struct GroupedMinMaxImpl final : public GroupedAggregator {
   using ArrType =
       typename std::conditional<is_boolean_type<Type>::value, uint8_t, CType>::type;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
     // type_ initialized by MinMaxInit
     mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
     maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
@@ -1355,11 +1349,10 @@ struct GroupedMinMaxImpl<Type,
   using Allocator = arrow::stl::allocator<char>;
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx->memory_pool());
-    options_ = *checked_cast<const ScalarAggregateOptions*>(options);
+    options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
     // type_ initialized by MinMaxInit
     has_values_ = TypedBufferBuilder<bool>(ctx->memory_pool());
     has_nulls_ = TypedBufferBuilder<bool>(ctx->memory_pool());
@@ -1518,10 +1511,7 @@ struct GroupedMinMaxImpl<Type,
 };
 
 struct GroupedNullMinMaxImpl final : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions*) override {
-    return Status::OK();
-  }
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override { return Status::OK(); }
 
   Status Resize(int64_t new_num_groups) override {
     num_groups_ = new_num_groups;
@@ -1555,7 +1545,7 @@ template <typename T>
 Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
                                                 const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedMinMaxImpl<T>>(ctx, args));
-  static_cast<GroupedMinMaxImpl<T>*>(impl.get())->type_ = args.inputs[0].type;
+  static_cast<GroupedMinMaxImpl<T>*>(impl.get())->type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -1565,17 +1555,13 @@ HashAggregateKernel MakeMinOrMaxKernel(HashAggregateFunction* min_max_func) {
   kernel.init = [min_max_func](
                     KernelContext* ctx,
                     const KernelInitArgs& args) -> Result<std::unique_ptr<KernelState>> {
-    std::vector<ValueDescr> inputs = args.inputs;
+    std::vector<TypeHolder> inputs = args.inputs;
     ARROW_ASSIGN_OR_RAISE(auto kernel, min_max_func->DispatchExact(args.inputs));
     KernelInitArgs new_args{kernel, inputs, args.options};
     return kernel->init(ctx, new_args);
   };
-  kernel.signature = KernelSignature::Make(
-      {InputType(ValueDescr::ANY), InputType::Array(Type::UINT32)},
-      OutputType([](KernelContext* ctx,
-                    const std::vector<ValueDescr>& descrs) -> Result<ValueDescr> {
-        return ValueDescr::Array(descrs[0].type);
-      }));
+  kernel.signature =
+      KernelSignature::Make({InputType::Any(), Type::UINT32}, OutputType(FirstType));
   kernel.resize = HashAggregateResize;
   kernel.consume = HashAggregateConsume;
   kernel.merge = HashAggregateMerge;
@@ -1646,7 +1632,7 @@ struct GroupedMinMaxFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedMinMaxFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -1660,9 +1646,8 @@ struct GroupedMinMaxFactory {
 
 template <typename Impl>
 struct GroupedBooleanAggregator : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    options_ = checked_cast<const ScalarAggregateOptions&>(*options);
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
+    options_ = checked_cast<const ScalarAggregateOptions&>(*args.options);
     pool_ = ctx->memory_pool();
     reduced_ = TypedBufferBuilder<bool>(pool_);
     no_nulls_ = TypedBufferBuilder<bool>(pool_);
@@ -1831,11 +1816,10 @@ struct GroupedAllImpl : public GroupedBooleanAggregator<GroupedAllImpl> {
 // CountDistinct/Distinct implementation
 
 struct GroupedCountDistinctImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
     ctx_ = ctx;
     pool_ = ctx->memory_pool();
-    options_ = checked_cast<const CountOptions&>(*options);
+    options_ = checked_cast<const CountOptions&>(*args.options);
     return Status::OK();
   }
 
@@ -1977,7 +1961,7 @@ Result<std::unique_ptr<KernelState>> GroupedDistinctInit(KernelContext* ctx,
                                                          const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<Impl>(ctx, args));
   auto instance = static_cast<Impl*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   ARROW_ASSIGN_OR_RAISE(instance->grouper_,
                         Grouper::Make(args.inputs, ctx->exec_context()));
   return std::move(impl);
@@ -1991,8 +1975,7 @@ struct GroupedOneImpl final : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     // out_type_ initialized by GroupedOneInit
     ones_ = TypedBufferBuilder<CType>(ctx->memory_pool());
     has_one_ = TypedBufferBuilder<bool>(ctx->memory_pool());
@@ -2059,10 +2042,7 @@ struct GroupedOneImpl final : public GroupedAggregator {
 };
 
 struct GroupedNullOneImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
-    return Status::OK();
-  }
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override { return Status::OK(); }
 
   Status Resize(int64_t new_num_groups) override {
     num_groups_ = new_num_groups;
@@ -2092,8 +2072,7 @@ struct GroupedOneImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
   using Allocator = arrow::stl::allocator<char>;
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx->memory_pool());
     // out_type_ initialized by GroupedOneInit
@@ -2226,7 +2205,7 @@ Result<std::unique_ptr<KernelState>> GroupedOneInit(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedOneImpl<T>>(ctx, args));
   auto instance = static_cast<GroupedOneImpl<T>*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -2281,7 +2260,7 @@ struct GroupedOneFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedOneFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -2298,8 +2277,7 @@ struct GroupedListImpl final : public GroupedAggregator {
   using CType = typename TypeTraits<Type>::CType;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     has_nulls_ = false;
     // out_type_ initialized by GroupedListInit
@@ -2407,8 +2385,7 @@ struct GroupedListImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
   using StringType = std::basic_string<char, std::char_traits<char>, Allocator>;
   using GetSet = GroupedValueTraits<Type>;
 
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     allocator_ = Allocator(ctx_->memory_pool());
     // out_type_ initialized by GroupedListInit
@@ -2564,8 +2541,7 @@ struct GroupedListImpl<Type, enable_if_t<is_base_binary_type<Type>::value ||
 };
 
 struct GroupedNullListImpl : public GroupedAggregator {
-  Status Init(ExecContext* ctx, const std::vector<ValueDescr>&,
-              const FunctionOptions* options) override {
+  Status Init(ExecContext* ctx, const KernelInitArgs&) override {
     ctx_ = ctx;
     counts_ = TypedBufferBuilder<int64_t>(ctx_->memory_pool());
     return Status::OK();
@@ -2627,7 +2603,7 @@ Result<std::unique_ptr<KernelState>> GroupedListInit(KernelContext* ctx,
                                                      const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit<GroupedListImpl<T>>(ctx, args));
   auto instance = static_cast<GroupedListImpl<T>*>(impl.get());
-  instance->out_type_ = args.inputs[0].type;
+  instance->out_type_ = args.inputs[0].GetSharedPtr();
   return std::move(impl);
 }
 
@@ -2682,7 +2658,7 @@ struct GroupedListFactory {
 
   static Result<HashAggregateKernel> Make(const std::shared_ptr<DataType>& type) {
     GroupedListFactory factory;
-    factory.argument_type = InputType::Array(type->id());
+    factory.argument_type = type->id();
     RETURN_NOT_OK(VisitTypeInline(*type, &factory));
     return std::move(factory.kernel);
   }
@@ -2812,7 +2788,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
         "hash_count", Arity::Binary(), hash_count_doc, &default_count_options);
 
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, HashAggregateInit<GroupedCountImpl>)));
+        MakeKernel(InputType::Any(), HashAggregateInit<GroupedCountImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2970,7 +2946,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
         "hash_count_distinct", Arity::Binary(), hash_count_distinct_doc,
         &default_count_options);
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, GroupedDistinctInit<GroupedCountDistinctImpl>)));
+        MakeKernel(InputType::Any(), GroupedDistinctInit<GroupedCountDistinctImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
@@ -2978,7 +2954,7 @@ void RegisterHashAggregateBasic(FunctionRegistry* registry) {
     auto func = std::make_shared<HashAggregateFunction>(
         "hash_distinct", Arity::Binary(), hash_distinct_doc, &default_count_options);
     DCHECK_OK(func->AddKernel(
-        MakeKernel(ValueDescr::ARRAY, GroupedDistinctInit<GroupedDistinctImpl>)));
+        MakeKernel(InputType::Any(), GroupedDistinctInit<GroupedDistinctImpl>)));
     DCHECK_OK(registry->AddFunction(std::move(func)));
   }
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
index 82d40aba948..156e5896124 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
@@ -75,7 +75,7 @@ Result<Datum> NaiveGroupBy(std::vector<Datum> arguments, std::vector<Datum> keys
                            const std::vector<Aggregate>& aggregates) {
   ARROW_ASSIGN_OR_RAISE(auto key_batch, ExecBatch::Make(std::move(keys)));
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_batch.GetTypes()));
 
   ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
 
@@ -305,19 +305,24 @@ TEST(Grouper, SupportedKeys) {
 }
 
 struct TestGrouper {
-  explicit TestGrouper(std::vector<ValueDescr> descrs) : descrs_(std::move(descrs)) {
-    grouper_ = Grouper::Make(descrs_).ValueOrDie();
+  explicit TestGrouper(std::vector<TypeHolder> types, std::vector<ArgShape> shapes = {})
+      : types_(std::move(types)), shapes_(std::move(shapes)) {
+    grouper_ = Grouper::Make(types_).ValueOrDie();
 
     FieldVector fields;
-    for (const auto& descr : descrs_) {
-      fields.push_back(field("", descr.type));
+    for (const auto& type : types_) {
+      fields.push_back(field("", type.GetSharedPtr()));
     }
     key_schema_ = schema(std::move(fields));
   }
 
   void ExpectConsume(const std::string& key_json, const std::string& expected) {
-    ExpectConsume(ExecBatchFromJSON(descrs_, key_json),
-                  ArrayFromJSON(uint32(), expected));
+    auto expected_arr = ArrayFromJSON(uint32(), expected);
+    if (shapes_.size() > 0) {
+      ExpectConsume(ExecBatchFromJSON(types_, shapes_, key_json), expected_arr);
+    } else {
+      ExpectConsume(ExecBatchFromJSON(types_, key_json), expected_arr);
+    }
   }
 
   void ExpectConsume(const std::vector<Datum>& key_values, Datum expected) {
@@ -336,7 +341,11 @@ struct TestGrouper {
   }
 
   void ExpectUniques(const std::string& uniques_json) {
-    ExpectUniques(ExecBatchFromJSON(descrs_, uniques_json));
+    if (shapes_.size() > 0) {
+      ExpectUniques(ExecBatchFromJSON(types_, shapes_, uniques_json));
+    } else {
+      ExpectUniques(ExecBatchFromJSON(types_, uniques_json));
+    }
   }
 
   void AssertEquivalentIds(const Datum& expected, const Datum& actual) {
@@ -422,7 +431,8 @@ struct TestGrouper {
     }
   }
 
-  std::vector<ValueDescr> descrs_;
+  std::vector<TypeHolder> types_;
+  std::vector<ArgShape> shapes_;
   std::shared_ptr<Schema> key_schema_;
   std::unique_ptr<Grouper> grouper_;
   ExecBatch uniques_ = ExecBatch({}, -1);
@@ -700,11 +710,11 @@ TEST(Grouper, ScalarValues) {
   // large_utf8 forces GrouperImpl over GrouperFastImpl
   for (const auto& str_type : {utf8(), large_utf8()}) {
     {
-      TestGrouper g({ValueDescr::Scalar(boolean()), ValueDescr::Scalar(int32()),
-                     ValueDescr::Scalar(decimal128(3, 2)),
-                     ValueDescr::Scalar(decimal256(3, 2)),
-                     ValueDescr::Scalar(fixed_size_binary(2)),
-                     ValueDescr::Scalar(str_type), ValueDescr::Array(int32())});
+      TestGrouper g(
+          {boolean(), int32(), decimal128(3, 2), decimal256(3, 2), fixed_size_binary(2),
+           str_type, int32()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR,
+           ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY});
       g.ExpectConsume(
           R"([
 [true, 1, "1.00", "2.00", "ab", "foo", 2],
@@ -715,7 +725,7 @@ TEST(Grouper, ScalarValues) {
     }
     {
       auto dict_type = dictionary(int32(), utf8());
-      TestGrouper g({ValueDescr::Scalar(dict_type), ValueDescr::Scalar(str_type)});
+      TestGrouper g({dict_type, str_type}, {ArgShape::SCALAR, ArgShape::SCALAR});
       const auto dict = R"(["foo", null])";
       g.ExpectConsume(
           {DictScalarFromJSON(dict_type, "0", dict), ScalarFromJSON(str_type, R"("")")},
@@ -846,9 +856,9 @@ TEST(GroupBy, CountOnly) {
 TEST(GroupBy, CountScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[1, 1], [1, 1], [1, 2], [1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -1061,9 +1071,10 @@ TEST(GroupBy, MeanOnly) {
 TEST(GroupBy, SumMeanProductScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[1, 1], [1, 1], [1, 2], [1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -1450,11 +1461,12 @@ TEST(GroupBy, ApproximateMedian) {
 TEST(GroupBy, StddevVarianceTDigestScalar) {
   BatchesWithSchema input;
   input.batches = {
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+                        "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 3]]"),
       ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 3]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
+          {int32(), float32(), int64()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
           "[[null, null, 1], [null, null, 1], [null, null, 2], [null, null, 3]]"),
       ExecBatchFromJSON({int32(), float32(), int64()},
                         "[[2, 2.0, 1], [3, 3.0, 2], [4, 4.0, 3]]"),
@@ -1499,14 +1511,16 @@ TEST(GroupBy, VarianceOptions) {
   BatchesWithSchema input;
   input.batches = {
       ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
+          {int32(), float32(), int64()},
+          {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
           "[[1, 1.0, 1], [1, 1.0, 1], [1, 1.0, 2], [1, 1.0, 2], [1, 1.0, 3]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[1, 1.0, 4], [1, 1.0, 4]]"),
-      ExecBatchFromJSON(
-          {ValueDescr::Scalar(int32()), ValueDescr::Scalar(float32()), int64()},
-          "[[null, null, 1]]"),
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+                        "[[1, 1.0, 4], [1, 1.0, 4]]"),
+      ExecBatchFromJSON({int32(), float32(), int64()},
+                        {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY},
+
+                        "[[null, null, 1]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[2, 2.0, 1], [3, 3.0, 2]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[4, 4.0, 2], [2, 2.0, 4]]"),
       ExecBatchFromJSON({int32(), float32(), int64()}, "[[null, null, 4]]"),
@@ -1980,9 +1994,10 @@ TEST(GroupBy, MinOrMax) {
 TEST(GroupBy, MinMaxScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[-1, 1], [-1, 1], [-1, 2], [-1, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({int32(), int64()}, "[[2, 1], [3, 2], [4, 3]]"),
   };
@@ -2102,9 +2117,10 @@ TEST(GroupBy, AnyAndAll) {
 TEST(GroupBy, AnyAllScalar) {
   BatchesWithSchema input;
   input.batches = {
-      ExecBatchFromJSON({ValueDescr::Scalar(boolean()), int64()},
+      ExecBatchFromJSON({boolean(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
                         "[[true, 1], [true, 1], [true, 2], [true, 3]]"),
-      ExecBatchFromJSON({ValueDescr::Scalar(boolean()), int64()},
+      ExecBatchFromJSON({boolean(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
                         "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
       ExecBatchFromJSON({boolean(), int64()}, "[[true, 1], [false, 2], [null, 3]]"),
   };
@@ -2730,11 +2746,13 @@ TEST(GroupBy, OneBinaryTypes) {
 
 TEST(GroupBy, OneScalar) {
   BatchesWithSchema input;
-  input.batches = {ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
-                                     R"([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])"),
-                   ExecBatchFromJSON({ValueDescr::Scalar(int32()), int64()},
-                                     R"([[null, 1], [null, 1], [null, 2], [null, 3]])"),
-                   ExecBatchFromJSON({int32(), int64()}, R"([[22, 1], [3, 2], [4, 3]])")};
+  input.batches = {
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
+                        R"([[-1, 1], [-1, 1], [-1, 1], [-1, 1]])"),
+      ExecBatchFromJSON({int32(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+                        R"([[null, 1], [null, 1], [null, 2], [null, 3]])"),
+      ExecBatchFromJSON({int32(), int64()}, R"([[22, 1], [3, 2], [4, 3]])")};
   input.schema = schema({field("argument", int32()), field("key", int64())});
 
   for (bool use_threads : {true, false}) {
diff --git a/cpp/src/arrow/compute/kernels/row_encoder.cc b/cpp/src/arrow/compute/kernels/row_encoder.cc
index 81437de4ecd..beff3436100 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder.cc
+++ b/cpp/src/arrow/compute/kernels/row_encoder.cc
@@ -254,36 +254,37 @@ Result<std::shared_ptr<ArrayData>> DictionaryKeyEncoder::Decode(uint8_t** encode
   return data;
 }
 
-void RowEncoder::Init(const std::vector<ValueDescr>& column_types, ExecContext* ctx) {
+void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx) {
   ctx_ = ctx;
   encoders_.resize(column_types.size());
 
   for (size_t i = 0; i < column_types.size(); ++i) {
-    const auto& column_type = column_types[i].type;
-
-    if (column_type->id() == Type::BOOL) {
+    const TypeHolder& type = column_types[i];
+    if (type.id() == Type::BOOL) {
       encoders_[i] = std::make_shared<BooleanKeyEncoder>();
       continue;
     }
 
-    if (column_type->id() == Type::DICTIONARY) {
+    if (type.id() == Type::DICTIONARY) {
       encoders_[i] =
-          std::make_shared<DictionaryKeyEncoder>(column_type, ctx->memory_pool());
+          std::make_shared<DictionaryKeyEncoder>(type.GetSharedPtr(), ctx->memory_pool());
       continue;
     }
 
-    if (is_fixed_width(column_type->id())) {
-      encoders_[i] = std::make_shared<FixedWidthKeyEncoder>(column_type);
+    if (is_fixed_width(type.id())) {
+      encoders_[i] = std::make_shared<FixedWidthKeyEncoder>(type.GetSharedPtr());
       continue;
     }
 
-    if (is_binary_like(column_type->id())) {
-      encoders_[i] = std::make_shared<VarLengthKeyEncoder<BinaryType>>(column_type);
+    if (is_binary_like(type.id())) {
+      encoders_[i] =
+          std::make_shared<VarLengthKeyEncoder<BinaryType>>(type.GetSharedPtr());
       continue;
     }
 
-    if (is_large_binary_like(column_type->id())) {
-      encoders_[i] = std::make_shared<VarLengthKeyEncoder<LargeBinaryType>>(column_type);
+    if (is_large_binary_like(type.id())) {
+      encoders_[i] =
+          std::make_shared<VarLengthKeyEncoder<LargeBinaryType>>(type.GetSharedPtr());
       continue;
     }
 
diff --git a/cpp/src/arrow/compute/kernels/row_encoder.h b/cpp/src/arrow/compute/kernels/row_encoder.h
index 0ccb18a71d0..57240172488 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder.h
+++ b/cpp/src/arrow/compute/kernels/row_encoder.h
@@ -255,7 +255,7 @@ class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
 
-  void Init(const std::vector<ValueDescr>& column_types, ExecContext* ctx);
+  void Init(const std::vector<TypeHolder>& column_types, ExecContext* ctx);
   void Clear();
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 9b2d3cce89f..e513e07d49f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -689,10 +689,10 @@ struct RoundOptionsWrapper<RoundToMultipleOptions>
     // The output type is not available here so we use the following rule:
     // If `multiple` is neither a floating-point nor a decimal type, then
     // cast to float64, else cast to the kernel's input type.
-    const auto& to_type =
+    std::shared_ptr<DataType> to_type =
         (!is_floating(multiple->type->id()) && !is_decimal(multiple->type->id()))
             ? float64()
-            : args.inputs[0].type;
+            : args.inputs[0].GetSharedPtr();
     if (!multiple->type->Equals(to_type)) {
       ARROW_ASSIGN_OR_RAISE(
           auto casted_multiple,
@@ -1065,24 +1065,24 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
 
 // resolve decimal binary operation output type per *casted* args
 template <typename OutputGetter>
-Result<ValueDescr> ResolveDecimalBinaryOperationOutput(
-    const std::vector<ValueDescr>& args, OutputGetter&& getter) {
-  // casted args should be same size decimals
-  auto left_type = checked_cast<const DecimalType*>(args[0].type.get());
-  auto right_type = checked_cast<const DecimalType*>(args[1].type.get());
-  DCHECK_EQ(left_type->id(), right_type->id());
+Result<TypeHolder> ResolveDecimalBinaryOperationOutput(
+    const std::vector<TypeHolder>& types, OutputGetter&& getter) {
+  // casted types should be same size decimals
+  const auto& left_type = checked_cast<const DecimalType&>(*types[0]);
+  const auto& right_type = checked_cast<const DecimalType&>(*types[1]);
+  DCHECK_EQ(left_type.id(), right_type.id());
 
   int32_t precision, scale;
-  std::tie(precision, scale) = getter(left_type->precision(), left_type->scale(),
-                                      right_type->precision(), right_type->scale());
-  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type->id(), precision, scale));
-  return ValueDescr(std::move(type), GetBroadcastShape(args));
+  std::tie(precision, scale) = getter(left_type.precision(), left_type.scale(),
+                                      right_type.precision(), right_type.scale());
+  ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale));
+  return std::move(type);
 }
 
-Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
-    KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalAdditionOrSubtractionOutput(
+    KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         DCHECK_EQ(s1, s2);
         const int32_t scale = s1;
         const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
@@ -1090,20 +1090,20 @@ Result<ValueDescr> ResolveDecimalAdditionOrSubtractionOutput(
       });
 }
 
-Result<ValueDescr> ResolveDecimalMultiplicationOutput(
-    KernelContext*, const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalMultiplicationOutput(
+    KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         const int32_t scale = s1 + s2;
         const int32_t precision = p1 + p2 + 1;
         return std::make_pair(precision, scale);
       });
 }
 
-Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
-                                                const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveDecimalDivisionOutput(KernelContext*,
+                                                const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      args, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
         DCHECK_GE(s1, s2);
         const int32_t scale = s1 - s2;
         const int32_t precision = p1;
@@ -1111,21 +1111,21 @@ Result<ValueDescr> ResolveDecimalDivisionOutput(KernelContext*,
       });
 }
 
-Result<ValueDescr> ResolveTemporalOutput(KernelContext*,
-                                         const std::vector<ValueDescr>& args) {
-  DCHECK_EQ(args[0].type->id(), args[1].type->id());
-  auto left_type = checked_cast<const TimestampType*>(args[0].type.get());
-  auto right_type = checked_cast<const TimestampType*>(args[1].type.get());
-  DCHECK_EQ(left_type->unit(), left_type->unit());
+Result<TypeHolder> ResolveTemporalOutput(KernelContext*,
+                                         const std::vector<TypeHolder>& types) {
+  DCHECK_EQ(types[0].id(), types[1].id());
+  const auto& left_type = checked_cast<const TimestampType&>(*types[0]);
+  const auto& right_type = checked_cast<const TimestampType&>(*types[1]);
+  DCHECK_EQ(left_type.unit(), left_type.unit());
 
-  if ((left_type->timezone() == "" || right_type->timezone() == "") &&
-      left_type->timezone() != right_type->timezone()) {
+  if ((left_type.timezone() == "" || right_type.timezone() == "") &&
+      left_type.timezone() != right_type.timezone()) {
     return Status::Invalid("Subtraction of zoned and non-zoned times is ambiguous. (",
-                           left_type->timezone(), right_type->timezone(), ").");
+                           left_type.timezone(), right_type.timezone(), ").");
   }
 
-  auto type = duration(right_type->unit());
-  return ValueDescr(std::move(type), GetBroadcastShape(args));
+  auto type = duration(right_type.unit());
+  return std::move(type);
 }
 
 template <typename Op>
@@ -1195,44 +1195,46 @@ ArrayKernelExec GenerateArithmeticWithFixedIntOutType(detail::GetTypeId get_id)
 struct ArithmeticFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
-    RETURN_NOT_OK(CheckDecimals(values));
+    RETURN_NOT_OK(CheckDecimals(types));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
     // Only promote types for binary functions
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
       TimeUnit::type finest_unit;
-      if (CommonTemporalResolution(values->data(), values->size(), &finest_unit)) {
-        ReplaceTemporalTypes(finest_unit, values);
-      } else if (auto numeric_type = CommonNumeric(*values)) {
-        ReplaceTypes(numeric_type, values);
+      if (CommonTemporalResolution(types->data(), types->size(), &finest_unit)) {
+        ReplaceTemporalTypes(finest_unit, types);
+      } else {
+        if (TypeHolder type = CommonNumeric(*types)) {
+          ReplaceTypes(type, types);
+        }
       }
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 
-  Status CheckDecimals(std::vector<ValueDescr>* values) const {
-    if (!HasDecimal(*values)) return Status::OK();
+  Status CheckDecimals(std::vector<TypeHolder>* types) const {
+    if (!HasDecimal(*types)) return Status::OK();
 
-    if (values->size() == 2) {
+    if (types->size() == 2) {
       // "add_checked" -> "add"
       const auto func_name = name();
       const std::string op = func_name.substr(0, func_name.find("_"));
       if (op == "add" || op == "subtract") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kAdd, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kAdd, types);
       } else if (op == "multiply") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kMultiply, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kMultiply, types);
       } else if (op == "divide") {
-        return CastBinaryDecimalArgs(DecimalPromotion::kDivide, values);
+        return CastBinaryDecimalArgs(DecimalPromotion::kDivide, types);
       } else {
         return Status::Invalid("Invalid decimal function: ", func_name);
       }
@@ -1245,29 +1247,30 @@ struct ArithmeticFunction : ScalarFunction {
 struct ArithmeticDecimalToFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_decimal(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_decimal((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (TypeHolder type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1275,30 +1278,31 @@ struct ArithmeticDecimalToFloatingPointFunction : public ArithmeticFunction {
 struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
-    RETURN_NOT_OK(CheckDecimals(values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
+    RETURN_NOT_OK(CheckDecimals(types));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_integer((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1306,29 +1310,30 @@ struct ArithmeticIntegerToFloatingPointFunction : public ArithmeticFunction {
 struct ArithmeticFloatingPointFunction : public ArithmeticFunction {
   using ArithmeticFunction::ArithmeticFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (values->size() == 2) {
-      ReplaceNullWithOtherType(values);
+    if (types->size() == 2) {
+      ReplaceNullWithOtherType(types);
     }
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id()) || is_decimal(descr.type->id())) {
-        descr.type = float64();
+    for (size_t i = 0; i < types->size(); ++i) {
+      if (is_integer((*types)[i].type->id()) || is_decimal((*types)[i].type->id())) {
+        (*types)[i] = float64();
       }
     }
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
+
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1426,27 +1431,28 @@ std::shared_ptr<ScalarFunction> MakeUnaryArithmeticFunctionNotNull(std::string n
 // Exec the round kernel for the given types
 template <typename Type, typename OptionsType,
           template <typename, RoundMode, typename...> class OpImpl>
-Status ExecRound(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  using State = RoundOptionsWrapper<OptionsType>;
-  const auto& state = static_cast<const State&>(*ctx->state());
-  switch (state.options.round_mode) {
-    ROUND_CASE(DOWN)
-    ROUND_CASE(UP)
-    ROUND_CASE(TOWARDS_ZERO)
-    ROUND_CASE(TOWARDS_INFINITY)
-    ROUND_CASE(HALF_DOWN)
-    ROUND_CASE(HALF_UP)
-    ROUND_CASE(HALF_TOWARDS_ZERO)
-    ROUND_CASE(HALF_TOWARDS_INFINITY)
-    ROUND_CASE(HALF_TO_EVEN)
-    ROUND_CASE(HALF_TO_ODD)
-  }
-  DCHECK(false);
-  return Status::NotImplemented(
-      "Internal implementation error: round mode not implemented: ",
-      state.options.ToString());
-}
-
+struct RoundKernel {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    using State = RoundOptionsWrapper<OptionsType>;
+    const auto& state = static_cast<const State&>(*ctx->state());
+    switch (state.options.round_mode) {
+      ROUND_CASE(DOWN)
+      ROUND_CASE(UP)
+      ROUND_CASE(TOWARDS_ZERO)
+      ROUND_CASE(TOWARDS_INFINITY)
+      ROUND_CASE(HALF_DOWN)
+      ROUND_CASE(HALF_UP)
+      ROUND_CASE(HALF_TOWARDS_ZERO)
+      ROUND_CASE(HALF_TOWARDS_INFINITY)
+      ROUND_CASE(HALF_TO_EVEN)
+      ROUND_CASE(HALF_TO_ODD)
+    }
+    DCHECK(false);
+    return Status::NotImplemented(
+        "Internal implementation error: round mode not implemented: ",
+        state.options.ToString());
+  }
+};
 #undef ROUND_CASE
 
 // Like MakeUnaryArithmeticFunction, but for unary rounding functions that control
@@ -1460,22 +1466,24 @@ std::shared_ptr<ScalarFunction> MakeUnaryRoundFunction(std::string name,
       name, Arity::Unary(), std::move(doc), &kDefaultOptions);
   for (const auto& ty : {float32(), float64(), decimal128(1, 0), decimal256(1, 0)}) {
     auto type_id = ty->id();
-    auto exec = [type_id](KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-      switch (type_id) {
-        case Type::FLOAT:
-          return ExecRound<FloatType, OptionsType, Op>(ctx, batch, out);
-        case Type::DOUBLE:
-          return ExecRound<DoubleType, OptionsType, Op>(ctx, batch, out);
-        case Type::DECIMAL128:
-          return ExecRound<Decimal128Type, OptionsType, Op>(ctx, batch, out);
-        case Type::DECIMAL256:
-          return ExecRound<Decimal256Type, OptionsType, Op>(ctx, batch, out);
-        default: {
-          DCHECK(false);
-          return ExecFail(ctx, batch, out);
-        }
-      }
-    };
+    ArrayKernelExec exec = nullptr;
+    switch (type_id) {
+      case Type::FLOAT:
+        exec = RoundKernel<FloatType, OptionsType, Op>::Exec;
+        break;
+      case Type::DOUBLE:
+        exec = RoundKernel<DoubleType, OptionsType, Op>::Exec;
+        break;
+      case Type::DECIMAL128:
+        exec = RoundKernel<Decimal128Type, OptionsType, Op>::Exec;
+        break;
+      case Type::DECIMAL256:
+        exec = RoundKernel<Decimal256Type, OptionsType, Op>::Exec;
+        break;
+      default:
+        DCHECK(false);
+        break;
+    }
     DCHECK_OK(func->AddKernel(
         {InputType(type_id)},
         is_decimal(type_id) ? OutputType(FirstType) : OutputType(ty), exec, State::Init));
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 042c4c1304b..943ca4c2dc2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -95,18 +95,11 @@ inline Bitmap GetBitmap(const ArraySpan& arr, int index) {
   return Bitmap{arr.buffers[index].data, arr.offset, arr.length};
 }
 
-struct InvertOp {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    *checked_cast<BooleanScalar*>(out->scalar().get()) = InvertScalar(in);
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(in, 1));
-    return Status::OK();
-  }
-};
+Status InvertOpExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  ArraySpan* out_span = out->array_span();
+  GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(batch[0].array, 1));
+  return Status::OK();
+}
 
 template <typename Op>
 struct Commutative {
@@ -119,16 +112,6 @@ struct Commutative {
 struct AndOp : Commutative<AndOp> {
   using Commutative<AndOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out->scalar().get())->value =
-          checked_cast<const BooleanScalar&>(left).value &&
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -153,20 +136,6 @@ struct AndOp : Commutative<AndOp> {
 struct KleeneAndOp : Commutative<KleeneAndOp> {
   using Commutative<KleeneAndOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
-
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
-
-    Scalar* out_scalar = out->scalar().get();
-    checked_cast<BooleanScalar*>(out_scalar)->value = left_true && right_true;
-    out_scalar->is_valid = left_false || right_false || (left_true && right_true);
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -228,17 +197,6 @@ struct KleeneAndOp : Commutative<KleeneAndOp> {
 struct OrOp : Commutative<OrOp> {
   using Commutative<OrOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out_scalar)->value =
-          checked_cast<const BooleanScalar&>(left).value ||
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -263,20 +221,6 @@ struct OrOp : Commutative<OrOp> {
 struct KleeneOrOp : Commutative<KleeneOrOp> {
   using Commutative<KleeneOrOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
-    bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
-
-    bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
-    bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
-
-    checked_cast<BooleanScalar*>(out_scalar)->value = left_true || right_true;
-    out_scalar->is_valid = left_true || right_true || (left_false && right_false);
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -339,17 +283,6 @@ struct KleeneOrOp : Commutative<KleeneOrOp> {
 struct XorOp : Commutative<XorOp> {
   using Commutative<XorOp>::Call;
 
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (left.is_valid && right.is_valid) {
-      checked_cast<BooleanScalar*>(out_scalar)->value =
-          checked_cast<const BooleanScalar&>(left).value ^
-          checked_cast<const BooleanScalar&>(right).value;
-    }
-    return Status::OK();
-  }
-
   static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -372,11 +305,6 @@ struct XorOp : Commutative<XorOp> {
 };
 
 struct AndNotOp {
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    return AndOp::Call(ctx, left, InvertScalar(right), out);
-  }
-
   static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -404,11 +332,6 @@ struct AndNotOp {
 };
 
 struct KleeneAndNotOp {
-  static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
-                     ExecResult* out) {
-    return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
-  }
-
   static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
                      ExecResult* out) {
     ArraySpan* out_span = out->array_span();
@@ -560,7 +483,7 @@ namespace internal {
 
 void RegisterScalarBoolean(FunctionRegistry* registry) {
   // These functions can write into sliced output bitmaps
-  MakeFunction("invert", 1, applicator::SimpleUnary<InvertOp>, invert_doc, registry);
+  MakeFunction("invert", 1, InvertOpExec, invert_doc, registry);
   MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, and_doc, registry);
   MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, and_not_doc, registry);
   MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, or_doc, registry);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index 57a5ccd7ab5..13c0d599bf9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -43,40 +43,6 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     return Status::OK();
   }
 
-  /// TODO: eliminate this code path by no longer supporting
-  /// scalar->scalar direct casting, which increases maintainability
-  if (batch[0].is_scalar()) {  // if input is scalar
-    auto in_scalar = checked_cast<const DictionaryScalar&>(*batch[0].scalar);
-
-    // if invalid scalar, return null scalar
-    if (!in_scalar.is_valid) {
-      out->value = MakeNullScalar(out_type.Copy());
-      return Status::OK();
-    }
-
-    Datum casted_index, casted_dict;
-    if (in_scalar.value.index->type->Equals(out_type.index_type())) {
-      casted_index = in_scalar.value.index;
-    } else {
-      ARROW_ASSIGN_OR_RAISE(casted_index,
-                            Cast(in_scalar.value.index, out_type.index_type(), options,
-                                 ctx->exec_context()));
-    }
-
-    if (in_scalar.value.dictionary->type()->Equals(out_type.value_type())) {
-      casted_dict = in_scalar.value.dictionary;
-    } else {
-      ARROW_ASSIGN_OR_RAISE(
-          casted_dict, Cast(in_scalar.value.dictionary, out_type.value_type(), options,
-                            ctx->exec_context()));
-    }
-
-    out->value = DictionaryScalar::Make(casted_index.scalar(), casted_dict.make_array());
-
-    return Status::OK();
-  }
-
-  // if input is array
   std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
   const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index 49e1e26e6ad..27a86135a63 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -33,38 +33,16 @@ namespace internal {
 
 namespace {
 
-template <typename OutT, typename InT>
-ARROW_DISABLE_UBSAN("float-cast-overflow")
-void DoStaticCast(const void* in_data, int64_t in_offset, int64_t length,
-                  int64_t out_offset, void* out_data) {
-  auto in = reinterpret_cast<const InT*>(in_data) + in_offset;
-  auto out = reinterpret_cast<OutT*>(out_data) + out_offset;
-  for (int64_t i = 0; i < length; ++i) {
-    *out++ = static_cast<OutT>(*in++);
-  }
-}
-
-using StaticCastFunc = std::function<void(const void*, int64_t, int64_t, int64_t, void*)>;
-
 template <typename OutType, typename InType, typename Enable = void>
 struct CastPrimitive {
-  static void Exec(const ExecValue& input, ExecResult* out) {
+  ARROW_DISABLE_UBSAN("float-cast-overflow")
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
     using OutT = typename OutType::c_type;
     using InT = typename InType::c_type;
-
-    StaticCastFunc caster = DoStaticCast<OutT, InT>;
-    if (input.is_array()) {
-      const ArraySpan& arr = input.array;
-      ArraySpan* out_span = out->array_span();
-      caster(arr.buffers[1].data, arr.offset, arr.length, out_span->offset,
-             out_span->buffers[1].data);
-    } else {
-      // Scalar path. Use the caster with length 1 to place the casted value into
-      // the output
-      const auto& in_scalar = input.scalar_as<PrimitiveScalarBase>();
-      auto out_scalar = checked_cast<PrimitiveScalarBase*>(out->scalar().get());
-      caster(reinterpret_cast<const void*>(in_scalar.view().data()), /*in_offset=*/0,
-             /*length=*/1, /*out_offset=*/0, out_scalar->mutable_data());
+    const InT* in_values = arr.GetValues<InT>(1);
+    OutT* out_values = out->GetValues<OutT>(1);
+    for (int64_t i = 0; i < arr.length; ++i) {
+      *out_values++ = static_cast<OutT>(*in_values++);
     }
   }
 };
@@ -72,26 +50,14 @@ struct CastPrimitive {
 template <typename OutType, typename InType>
 struct CastPrimitive<OutType, InType, enable_if_t<std::is_same<OutType, InType>::value>> {
   // memcpy output
-  static void Exec(const ExecValue& input, ExecResult* out) {
+  static void Exec(const ArraySpan& arr, ArraySpan* out) {
     using T = typename InType::c_type;
-
-    if (input.is_array()) {
-      const ArraySpan& arr = input.array;
-      std::memcpy(out->array_span()->GetValues<T>(1), arr.GetValues<T>(1),
-                  arr.length * sizeof(T));
-    } else {
-      // Scalar path. Use the caster with length 1 to place the casted value into
-      // the output
-      const auto& in_scalar = input.scalar_as<PrimitiveScalarBase>();
-      auto out_scalar = checked_cast<PrimitiveScalarBase*>(out->scalar().get());
-      *reinterpret_cast<T*>(out_scalar->mutable_data()) =
-          *reinterpret_cast<const T*>(in_scalar.view().data());
-    }
+    std::memcpy(out->GetValues<T>(1), arr.GetValues<T>(1), arr.length * sizeof(T));
   }
 };
 
 template <typename InType>
-void CastNumberImpl(Type::type out_type, const ExecValue& input, ExecResult* out) {
+void CastNumberImpl(Type::type out_type, const ArraySpan& input, ArraySpan* out) {
   switch (out_type) {
     case Type::INT8:
       return CastPrimitive<Int8Type, InType>::Exec(input, out);
@@ -121,7 +87,7 @@ void CastNumberImpl(Type::type out_type, const ExecValue& input, ExecResult* out
 }  // namespace
 
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
-                              const ExecValue& input, ExecResult* out) {
+                              const ArraySpan& input, ArraySpan* out) {
   switch (in_type) {
     case Type::INT8:
       return CastNumberImpl<Int8Type>(out_type, input, out);
@@ -152,89 +118,62 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
 // ----------------------------------------------------------------------
 
 Status UnpackDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
-
   // TODO: is there an implementation more friendly to the "span" data structures?
 
   DictionaryArray dict_arr(batch[0].array.ToArrayData());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
 
   const auto& dict_type = *dict_arr.dictionary()->type();
-  if (!dict_type.Equals(options.to_type) && !CanCast(dict_type, *options.to_type)) {
-    return Status::Invalid("Cast type ", options.to_type->ToString(),
+  const DataType& to_type = *options.to_type;
+  if (!to_type.Equals(dict_type) && !CanCast(dict_type, to_type)) {
+    return Status::Invalid("Cast type ", to_type.ToString(),
                            " incompatible with dictionary type ", dict_type.ToString());
   }
 
-  Datum take_result;
-  ARROW_ASSIGN_OR_RAISE(take_result,
-                        Take(Datum(dict_arr.dictionary()), Datum(dict_arr.indices()),
+  ARROW_ASSIGN_OR_RAISE(Datum unpacked,
+                        Take(dict_arr.dictionary(), dict_arr.indices(),
                              TakeOptions::Defaults(), ctx->exec_context()));
-
-  if (!dict_type.Equals(options.to_type)) {
-    ARROW_ASSIGN_OR_RAISE(take_result, Cast(take_result, options));
+  if (!dict_type.Equals(to_type)) {
+    ARROW_ASSIGN_OR_RAISE(unpacked, Cast(unpacked, options));
   }
-  out->value = std::move(take_result.array());
+  out->value = std::move(unpacked.array());
   return Status::OK();
 }
 
 Status OutputAllNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (out->is_scalar()) {
-    out->scalar()->is_valid = false;
-  } else {
-    // TODO(wesm): there is no good reason to have to use ArrayData here, so we
-    // should clean this up later. This is used in the dict<null>->null cast
-    DCHECK(out->is_array_data());
-    ArrayData* output = out->array_data().get();
-    output->buffers = {nullptr};
-    output->null_count = batch.length;
-  }
+  // TODO(wesm): there is no good reason to have to use ArrayData here, so we
+  // should clean this up later. This is used in the dict<null>->null cast
+  ArrayData* output = out->array_data().get();
+  output->buffers = {nullptr};
+  output->null_count = batch.length;
   return Status::OK();
 }
 
 Status CastFromExtension(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState*>(ctx->state())->options;
 
-  Datum result;
-  if (batch[0].is_scalar()) {
-    const auto& ext_scalar = checked_cast<const ExtensionScalar&>(*batch[0].scalar);
-    if (ext_scalar.is_valid) {
-      RETURN_NOT_OK(
-          Cast(ext_scalar.value, out->type()->Copy(), options, ctx->exec_context())
-              .Value(&result));
-    } else {
-      const auto& storage_type =
-          checked_cast<const ExtensionType&>(*ext_scalar.type).storage_type();
-      RETURN_NOT_OK(Cast(MakeNullScalar(storage_type), out->type()->Copy(), options,
-                         ctx->exec_context())
-                        .Value(&result));
-    }
-    out->value = std::move(result.scalar());
-  } else {
-    DCHECK(batch[0].is_array());
-    ExtensionArray extension(batch[0].array.ToArrayData());
-    std::shared_ptr<Array> result;
-    RETURN_NOT_OK(
-        Cast(*extension.storage(), out->type()->Copy(), options, ctx->exec_context())
-            .Value(&result));
-    out->value = std::move(result->data());
-  }
+  DCHECK(batch[0].is_array());
+  ExtensionArray extension(batch[0].array.ToArrayData());
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(Cast(*extension.storage(), out->type()->GetSharedPtr(), options,
+                     ctx->exec_context())
+                    .Value(&result));
+  out->value = std::move(result->data());
   return Status::OK();
 }
 
 Status CastFromNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   // TODO(wesm): handle this case more gracefully
-  if (!batch[0].is_scalar()) {
-    std::shared_ptr<Array> nulls;
-    RETURN_NOT_OK(MakeArrayOfNull(out->type()->Copy(), batch.length).Value(&nulls));
-    out->value = nulls->data();
-  }
+  std::shared_ptr<Array> nulls;
+  RETURN_NOT_OK(MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length).Value(&nulls));
+  out->value = nulls->data();
   return Status::OK();
 }
 
-Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
-                                            const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveOutputFromOptions(KernelContext* ctx,
+                                            const std::vector<TypeHolder>&) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
-  return ValueDescr(options.to_type, args[0].shape);
+  return options.to_type;
 }
 
 /// You will see some of kernels with
@@ -250,8 +189,6 @@ OutputType kOutputTargetType(ResolveOutputFromOptions);
 
 Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   // TODO(wesm): alternative strategy for zero copy casts after ARROW-16576
-  DCHECK(batch[0].is_array());
-  DCHECK(out->is_array_data());
   std::shared_ptr<ArrayData> input = batch[0].array.ToArrayData();
   ArrayData* output = out->array_data().get();
   output->length = input->length;
@@ -266,8 +203,7 @@ void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_ty
                      CastFunction* func) {
   auto sig = KernelSignature::Make({in_type}, out_type);
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(ZeroCopyCastExec,
-                                               /*use_array_span=*/false);
+  kernel.exec = ZeroCopyCastExec;
   kernel.signature = sig;
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
@@ -295,9 +231,7 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
     // XXX: Uses Take and does its own memory allocation for the moment. We can
     // fix this later.
     DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty,
-                              TrivialScalarUnaryAsArraysExec(UnpackDictionary,
-                                                             /*use_array_span=*/false),
-                              NullHandling::COMPUTED_NO_PREALLOCATE,
+                              UnpackDictionary, NullHandling::COMPUTED_NO_PREALLOCATE,
                               MemAllocation::NO_PREALLOCATE));
   }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
index 6a5f1067a20..4d9afab199c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.h
@@ -46,7 +46,7 @@ Status CastFromExtension(KernelContext* ctx, const ExecSpan& batch, ExecResult*
 
 // Utility for numeric casts
 void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
-                              const ExecValue& input, ExecResult* out);
+                              const ArraySpan& input, ArraySpan* out);
 
 // ----------------------------------------------------------------------
 // Dictionary to other things
@@ -58,13 +58,11 @@ Status OutputAllNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out)
 Status CastFromNull(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
 
 // Adds a cast function where CastFunctor is specialized and the input and output
-// types are parameter free (have a type_singleton). Scalar inputs are handled by
-// wrapping with TrivialScalarUnaryAsArraysExec.
+// types are parameter free (have a type_singleton).
 template <typename InType, typename OutType>
 void AddSimpleCast(InputType in_ty, OutputType out_ty, CastFunction* func) {
-  DCHECK_OK(func->AddKernel(
-      InType::type_id, {in_ty}, out_ty,
-      TrivialScalarUnaryAsArraysExec(CastFunctor<OutType, InType>::Exec)));
+  DCHECK_OK(func->AddKernel(InType::type_id, {in_ty}, out_ty,
+                            CastFunctor<OutType, InType>::Exec));
 }
 
 Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out);
@@ -72,10 +70,9 @@ Status ZeroCopyCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
 void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_type,
                      CastFunction* func);
 
-// OutputType::Resolver that returns a descr with the shape of the input
-// argument and the type from CastOptions
-Result<ValueDescr> ResolveOutputFromOptions(KernelContext* ctx,
-                                            const std::vector<ValueDescr>& args);
+// OutputType::Resolver that returns a type the type from CastOptions
+Result<TypeHolder> ResolveOutputFromOptions(KernelContext* ctx,
+                                            const std::vector<TypeHolder>& args);
 
 ARROW_EXPORT extern OutputType kOutputTargetType;
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index beef99c8e5f..21af2275d82 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -75,21 +75,6 @@ struct CastList {
 
     auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
 
-    if (out->is_scalar()) {
-      // The scalar case is simple, as only the underlying values must be cast
-      const auto& in_scalar = checked_cast<const BaseListScalar&>(*batch[0].scalar);
-      auto out_scalar = checked_cast<BaseListScalar*>(out->scalar().get());
-
-      DCHECK(!out_scalar->is_valid);
-      if (in_scalar.is_valid) {
-        ARROW_ASSIGN_OR_RAISE(out_scalar->value, Cast(*in_scalar.value, child_type,
-                                                      options, ctx->exec_context()));
-
-        out_scalar->is_valid = true;
-      }
-      return Status::OK();
-    }
-
     const ArraySpan& in_array = batch[0].array;
     auto offsets = in_array.GetValues<src_offset_type>(1);
 
@@ -186,26 +171,6 @@ struct CastStruct {
           in_type.ToString(), " output fields: ", out_type.ToString());
     }
 
-    if (out->is_scalar()) {
-      const auto& in_scalar = checked_cast<const StructScalar&>(*batch[0].scalar);
-      auto out_scalar = checked_cast<StructScalar*>(out->scalar().get());
-
-      DCHECK(!out_scalar->is_valid);
-      if (in_scalar.is_valid) {
-        out_field_index = 0;
-        for (int field_index : fields_to_select) {
-          const auto& values = in_scalar.value[field_index];
-          const auto& target_type = out->type()->field(out_field_index++)->type();
-          ARROW_ASSIGN_OR_RAISE(Datum cast_values,
-                                Cast(values, target_type, options, ctx->exec_context()));
-          DCHECK_EQ(Datum::SCALAR, cast_values.kind());
-          out_scalar->value.push_back(cast_values.scalar());
-        }
-        out_scalar->is_valid = true;
-      }
-      return Status::OK();
-    }
-
     const ArraySpan& in_array = batch[0].array;
     ArrayData* out_array = out->array_data().get();
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index f8d72dc08cc..61e8e90dddc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -41,18 +41,16 @@ namespace internal {
 Status CastIntegerToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
   if (!options.allow_int_overflow) {
-    if (batch[0].is_array()) {
-      RETURN_NOT_OK(IntegersCanFit(batch[0].array, *out->type()));
-    } else {
-      RETURN_NOT_OK(IntegersCanFit(*batch[0].scalar, *out->type()));
-    }
+    RETURN_NOT_OK(IntegersCanFit(batch[0].array, *out->type()));
   }
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
 Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult* out) {
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
@@ -63,7 +61,7 @@ Status CastFloatingToFloating(KernelContext*, const ExecSpan& batch, ExecResult*
 template <typename InType, typename OutType, typename InT = typename InType::c_type,
           typename OutT = typename OutType::c_type>
 ARROW_DISABLE_UBSAN("float-cast-overflow")
-Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
+Status CheckFloatTruncation(const ArraySpan& input, const ArraySpan& output) {
   auto WasTruncated = [&](OutT out_val, InT in_val) -> bool {
     return static_cast<InT>(out_val) != in_val;
   };
@@ -72,31 +70,17 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
   };
   auto GetErrorMessage = [&](InT val) {
     return Status::Invalid("Float value ", val, " was truncated converting to ",
-                           *output.type());
+                           *output.type);
   };
 
-  if (input.is_scalar()) {
-    DCHECK(output.is_scalar());
-    const auto& in_scalar = input.scalar_as<typename TypeTraits<InType>::ScalarType>();
-    const auto& out_scalar =
-        checked_cast<typename TypeTraits<OutType>::ScalarType&>(*output.scalar());
-    if (WasTruncatedMaybeNull(out_scalar.value, in_scalar.value, out_scalar.is_valid)) {
-      return GetErrorMessage(in_scalar.value);
-    }
-    return Status::OK();
-  }
-
-  const ArraySpan& in_array = input.array;
-  const ArraySpan& out_array = *output.array_span();
-
-  const InT* in_data = in_array.GetValues<InT>(1);
-  const OutT* out_data = out_array.GetValues<OutT>(1);
+  const InT* in_data = input.GetValues<InT>(1);
+  const OutT* out_data = output.GetValues<OutT>(1);
 
-  const uint8_t* bitmap = in_array.buffers[0].data;
-  OptionalBitBlockCounter bit_counter(bitmap, in_array.offset, in_array.length);
+  const uint8_t* bitmap = input.buffers[0].data;
+  OptionalBitBlockCounter bit_counter(bitmap, input.offset, input.length);
   int64_t position = 0;
-  int64_t offset_position = in_array.offset;
-  while (position < in_array.length) {
+  int64_t offset_position = input.offset;
+  while (position < input.length) {
     BitBlockCount block = bit_counter.NextBlock();
     bool block_out_of_bounds = false;
     if (block.popcount == block.length) {
@@ -112,7 +96,7 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
       }
     }
     if (ARROW_PREDICT_FALSE(block_out_of_bounds)) {
-      if (in_array.GetNullCount() > 0) {
+      if (input.GetNullCount() > 0) {
         for (int64_t i = 0; i < block.length; ++i) {
           if (WasTruncatedMaybeNull(out_data[i], in_data[i],
                                     bit_util::GetBit(bitmap, offset_position + i))) {
@@ -136,8 +120,8 @@ Status CheckFloatTruncation(const ExecValue& input, const ExecResult& output) {
 }
 
 template <typename InType>
-Status CheckFloatToIntTruncationImpl(const ExecValue& input, const ExecResult& output) {
-  switch (output.type()->id()) {
+Status CheckFloatToIntTruncationImpl(const ArraySpan& input, const ArraySpan& output) {
+  switch (output.type->id()) {
     case Type::INT8:
       return CheckFloatTruncation<InType, Int8Type>(input, output);
     case Type::INT16:
@@ -164,9 +148,9 @@ Status CheckFloatToIntTruncationImpl(const ExecValue& input, const ExecResult& o
 Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& output) {
   switch (input.type()->id()) {
     case Type::FLOAT:
-      return CheckFloatToIntTruncationImpl<FloatType>(input, output);
+      return CheckFloatToIntTruncationImpl<FloatType>(input.array, *output.array_span());
     case Type::DOUBLE:
-      return CheckFloatToIntTruncationImpl<DoubleType>(input, output);
+      return CheckFloatToIntTruncationImpl<DoubleType>(input.array, *output.array_span());
     default:
       break;
   }
@@ -176,7 +160,8 @@ Status CheckFloatToIntTruncation(const ExecValue& input, const ExecResult& outpu
 
 Status CastFloatingToInteger(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const auto& options = checked_cast<const CastState*>(ctx->state())->options;
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out->type()->id(), batch[0].array,
+                           out->array_span());
   if (!options.allow_float_truncate) {
     RETURN_NOT_OK(CheckFloatToIntTruncation(batch[0], *out));
   }
@@ -265,7 +250,8 @@ Status CastIntegerToFloating(KernelContext* ctx, const ExecSpan& batch, ExecResu
   if (!options.allow_float_truncate) {
     RETURN_NOT_OK(CheckForIntegerToFloatingTruncation(batch[0], out_type));
   }
-  CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0], out);
+  CastNumberToNumberUnsafe(batch[0].type()->id(), out_type, batch[0].array,
+                           out->array_span());
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index e6e0795ab16..dab91ac0346 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -51,10 +51,9 @@ struct NumericToStringCastFunctor {
   using FormatterType = StringFormatter<I>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     FormatterType formatter(input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(VisitArraySpanInline<I>(
         input,
         [&](value_type v) {
@@ -79,10 +78,9 @@ struct TemporalToStringCastFunctor {
   using FormatterType = StringFormatter<I>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     FormatterType formatter(input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(VisitArraySpanInline<I>(
         input,
         [&](value_type v) {
@@ -104,11 +102,10 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
   using FormatterType = StringFormatter<TimestampType>;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    DCHECK(out->is_array_data());
     const ArraySpan& input = batch[0].array;
     const auto& timezone = GetInputTimezone(*input.type);
     const auto& ty = checked_cast<const TimestampType&>(*input.type);
-    BuilderType builder(input.type->Copy(), ctx->memory_pool());
+    BuilderType builder(input.type->GetSharedPtr(), ctx->memory_pool());
 
     // Preallocate
     int64_t string_length = 19;  // YYYY-MM-DD HH:MM:SS
@@ -265,7 +262,6 @@ template <typename O, typename I>
 enable_if_base_binary<I, Status> BinaryToBinaryCastExec(KernelContext* ctx,
                                                         const ExecSpan& batch,
                                                         ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -287,7 +283,6 @@ enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 !std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -323,14 +318,31 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
         arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data,
                                     input.offset, input.length));
   }
-  // Data buffer (index 1) for FWBinary becomes data buffer for
-  // VarBinary (index 2)
-  output->buffers[2] = input.GetBuffer(1);
+
+  // This buffer is preallocated
   output_offset_type* offsets = output->GetMutableValues<output_offset_type>(1);
   offsets[0] = static_cast<output_offset_type>(input.offset * width);
   for (int64_t i = 0; i < input.length; i++) {
     offsets[i + 1] = offsets[i] + width;
   }
+
+  // Data buffer (index 1) for FWBinary becomes data buffer for VarBinary
+  // (index 2). After ARROW-16757, we need to copy this memory instead of
+  // zero-copy it because a Scalar value promoted to an ArraySpan may be
+  // referencing a temporary buffer whose scope does not extend beyond the
+  // kernel execution. In that scenario, the validity bitmap above can be
+  // zero-copied because it points to static memory (either a byte with a 1 or
+  // a 0 depending on whether the value is null or not).
+  std::shared_ptr<Buffer> input_data = input.GetBuffer(1);
+  if (input_data != nullptr) {
+    ARROW_ASSIGN_OR_RAISE(output->buffers[2], input_data->CopySlice(0, input_data->size(),
+                                                                    ctx->memory_pool()));
+  } else {
+    // TODO(wesm): it should already be nullptr, so we may be able to remove
+    // this
+    output->buffers[2] = nullptr;
+  }
+
   return Status::OK();
 }
 
@@ -339,14 +351,13 @@ enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  DCHECK(out->is_array_data());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const int32_t in_width = batch[0].type()->byte_width();
   const int32_t out_width =
       checked_cast<const FixedSizeBinaryType&>(*options.to_type).byte_width();
   if (in_width != out_width) {
     return Status::Invalid("Failed casting from ", batch[0].type()->ToString(), " to ",
-                           options.to_type->ToString(), ": widths must match");
+                           options.to_type.ToString(), ": widths must match");
   }
   return ZeroCopyCastExec(ctx, batch, out);
 }
@@ -363,17 +374,13 @@ void AddNumberToStringCasts(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
 
   DCHECK_OK(func->AddKernel(Type::BOOL, {boolean()}, out_ty,
-                            TrivialScalarUnaryAsArraysExec(
-                                NumericToStringCastFunctor<OutType, BooleanType>::Exec,
-                                /*use_array_span=*/false),
+                            NumericToStringCastFunctor<OutType, BooleanType>::Exec,
                             NullHandling::COMPUTED_NO_PREALLOCATE));
 
   for (const std::shared_ptr<DataType>& in_ty : NumericTypes()) {
     DCHECK_OK(
         func->AddKernel(in_ty->id(), {in_ty}, out_ty,
-                        TrivialScalarUnaryAsArraysExec(
-                            GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty),
-                            /*use_array_span=*/false),
+                        GenerateNumeric<NumericToStringCastFunctor, OutType>(*in_ty),
                         NullHandling::COMPUTED_NO_PREALLOCATE));
   }
 }
@@ -382,12 +389,10 @@ template <typename OutType>
 void AddTemporalToStringCasts(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
   for (const std::shared_ptr<DataType>& in_ty : TemporalTypes()) {
-    DCHECK_OK(func->AddKernel(
-        in_ty->id(), {InputType(in_ty->id())}, out_ty,
-        TrivialScalarUnaryAsArraysExec(
-            GenerateTemporal<TemporalToStringCastFunctor, OutType>(*in_ty),
-            /*use_array_span=*/false),
-        NullHandling::COMPUTED_NO_PREALLOCATE));
+    DCHECK_OK(
+        func->AddKernel(in_ty->id(), {InputType(in_ty->id())}, out_ty,
+                        GenerateTemporal<TemporalToStringCastFunctor, OutType>(*in_ty),
+                        NullHandling::COMPUTED_NO_PREALLOCATE));
   }
 }
 
@@ -395,11 +400,9 @@ template <typename OutType, typename InType>
 void AddBinaryToBinaryCast(CastFunction* func) {
   auto out_ty = TypeTraits<OutType>::type_singleton();
 
-  DCHECK_OK(func->AddKernel(
-      InType::type_id, {InputType(InType::type_id)}, out_ty,
-      TrivialScalarUnaryAsArraysExec(BinaryToBinaryCastExec<OutType, InType>,
-                                     /*use_array_span=*/false),
-      NullHandling::COMPUTED_NO_PREALLOCATE));
+  DCHECK_OK(func->AddKernel(InType::type_id, {InputType(InType::type_id)}, out_ty,
+                            BinaryToBinaryCastExec<OutType, InType>,
+                            NullHandling::COMPUTED_NO_PREALLOCATE));
 }
 
 template <typename OutType>
@@ -443,9 +446,7 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   DCHECK_OK(cast_fsb->AddKernel(
       Type::FIXED_SIZE_BINARY, {InputType(Type::FIXED_SIZE_BINARY)},
       OutputType(FirstType),
-      TrivialScalarUnaryAsArraysExec(
-          BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
-          /*use_array_span=*/false),
+      BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
       NullHandling::COMPUTED_NO_PREALLOCATE));
 
   return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
index 2c04efa57d9..7625fd7f786 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_temporal.cc
@@ -447,8 +447,7 @@ struct CastFunctor<TimestampType, I, enable_if_t<is_base_binary_type<I>::value>>
 template <typename Type>
 void AddCrossUnitCast(CastFunction* func) {
   ScalarKernel kernel;
-  kernel.exec = TrivialScalarUnaryAsArraysExec(CastFunctor<Type, Type>::Exec,
-                                               /*use_array_span=*/true);
+  kernel.exec = CastFunctor<Type, Type>::Exec;
   kernel.signature = KernelSignature::Make({InputType(Type::type_id)}, kOutputTargetType);
   DCHECK_OK(func->AddKernel(Type::type_id, std::move(kernel)));
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index b800299658b..c90c5a0f285 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -110,7 +110,7 @@ static void CheckCast(std::shared_ptr<Array> input, std::shared_ptr<Array> expec
 
 static void CheckCastFails(std::shared_ptr<Array> input, CastOptions options) {
   ASSERT_RAISES(Invalid, Cast(input, options))
-      << "\n  to_type:   " << options.to_type->ToString()
+      << "\n  to_type:   " << options.to_type.ToString()
       << "\n  from_type: " << input->type()->ToString()
       << "\n  input:     " << input->ToString();
 
@@ -2031,7 +2031,10 @@ TEST(Cast, BinaryToString) {
 
     // N.B. null buffer is not always the same if input sliced
     AssertBufferSame(*invalid_utf8, *strings, 0);
-    ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+
+    // ARROW-16757: we no longer zero copy, but the contents are equal
+    ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
   }
 }
 
@@ -2065,7 +2068,10 @@ TEST(Cast, BinaryOrStringToBinary) {
 
     // N.B. null buffer is not always the same if input sliced
     AssertBufferSame(*invalid_utf8, *strings, 0);
-    ASSERT_EQ(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+
+    // ARROW-16757: we no longer zero copy, but the contents are equal
+    ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
+    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
 
     // invalid utf-8 masked by a null bit is not an error
     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 83585a816c4..07778ca1136 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -192,50 +192,50 @@ void AddGenericCompare(const std::shared_ptr<DataType>& ty, ScalarFunction* func
 struct CompareFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
-    if (HasDecimal(*values)) {
-      RETURN_NOT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
+    if (HasDecimal(*types)) {
+      RETURN_NOT_OK(CastBinaryDecimalArgs(DecimalPromotion::kAdd, types));
     }
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
-    ReplaceNullWithOtherType(values);
+    EnsureDictionaryDecoded(types);
+    ReplaceNullWithOtherType(types);
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonBinary(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonBinary(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
 struct VarArgsCompareFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    if (auto type = CommonNumeric(*values)) {
-      ReplaceTypes(type, values);
-    } else if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonNumeric(*types)) {
+      ReplaceTypes(type, types);
+    } else if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -376,14 +376,9 @@ struct ScalarMinMax {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    const auto descrs = batch.GetDescriptors();
     const size_t scalar_count = static_cast<size_t>(
         std::count_if(batch.values.begin(), batch.values.end(),
                       [](const ExecValue& v) { return v.is_scalar(); }));
-    if (scalar_count == batch.values.size()) {
-      ExecScalar(batch, options, out->scalar().get());
-      return Status::OK();
-    }
 
     ArrayData* output = out->array_data().get();
 
@@ -397,7 +392,7 @@ struct ScalarMinMax {
     bool initialize_output = true;
     if (scalar_count > 0) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> temp_scalar,
-                            MakeScalar(out->type()->Copy(), 0));
+                            MakeScalar(out->type()->GetSharedPtr(), 0));
       ExecScalar(batch, options, temp_scalar.get());
       if (temp_scalar->is_valid) {
         const auto value = UnboxScalar<OutType>::Unbox(*temp_scalar);
@@ -486,47 +481,6 @@ struct ScalarMinMax {
   }
 };
 
-template <typename Op>
-Status ExecBinaryMinMaxScalar(KernelContext* ctx,
-                              const ElementWiseAggregateOptions& options,
-                              const ExecSpan& batch, ExecResult* out) {
-  if (batch.values.empty()) {
-    return Status::OK();
-  }
-  auto output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-  if (!options.skip_nulls) {
-    // any nulls in the input will produce a null output
-    for (const ExecValue& value : batch.values) {
-      if (!value.scalar->is_valid) {
-        output->is_valid = false;
-        return Status::OK();
-      }
-    }
-  }
-  const auto& first_scalar = *batch.values.front().scalar;
-  string_view result = checked_cast<const BaseBinaryScalar&>(first_scalar).view();
-  bool valid = first_scalar.is_valid;
-  for (int i = 1; i < batch.num_values(); i++) {
-    const Scalar& scalar = *batch[i].scalar;
-    if (!scalar.is_valid) {
-      DCHECK(options.skip_nulls);
-      continue;
-    } else {
-      string_view value = checked_cast<const BaseBinaryScalar&>(scalar).view();
-      result = !valid ? value : Op::Call(result, value);
-      valid = true;
-    }
-  }
-  if (valid) {
-    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(result.size()));
-    std::copy(result.begin(), result.end(), output->value->mutable_data());
-    output->is_valid = true;
-  } else {
-    output->is_valid = false;
-  }
-  return Status::OK();
-}
-
 template <typename Type, typename Op>
 struct BinaryScalarMinMax {
   using ArrayType = typename TypeTraits<Type>::ArrayType;
@@ -535,15 +489,6 @@ struct BinaryScalarMinMax {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    if (batch.is_all_scalar()) {
-      return ExecBinaryMinMaxScalar<Op>(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx,
-                                     const ElementWiseAggregateOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     // Presize data to avoid reallocations, using an estimation of final size.
     int64_t estimated_final_size = EstimateOutputSize(batch);
     BuilderType builder(ctx->memory_pool());
@@ -591,7 +536,7 @@ struct BinaryScalarMinMax {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     return Status::OK();
   }
@@ -620,21 +565,12 @@ template <typename Op>
 struct FixedSizeBinaryScalarMinMax {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
-    if (batch.is_all_scalar()) {
-      return ExecBinaryMinMaxScalar<Op>(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx,
-                                     const ElementWiseAggregateOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     const DataType* batch_type = batch[0].type();
     const auto binary_type = checked_cast<const FixedSizeBinaryType*>(batch_type);
     int32_t byte_width = binary_type->byte_width();
     // Presize data to avoid reallocations.
     int64_t estimated_final_size = batch.length * byte_width;
-    FixedSizeBinaryBuilder builder(batch_type->Copy());
+    FixedSizeBinaryBuilder builder(batch_type->GetSharedPtr());
     RETURN_NOT_OK(builder.Reserve(batch.length));
     RETURN_NOT_OK(builder.ReserveData(estimated_final_size));
 
@@ -645,13 +581,17 @@ struct FixedSizeBinaryScalarMinMax {
         result = result.empty() ? value : Op::Call(result, value);
       };
 
+      int num_valid_values = 0;
       for (int col = 0; col < batch.num_values(); col++) {
         if (batch[col].is_scalar()) {
           const Scalar& scalar = *batch[col].scalar;
           if (scalar.is_valid) {
             visit_value(UnboxScalar<FixedSizeBinaryType>::Unbox(scalar));
+            num_valid_values += 1;
           } else if (!options.skip_nulls) {
-            result = string_view();
+            // If we encounter a null, exit the loop and mark num_row_values to
+            // be 0 so we append a null
+            num_valid_values = 0;
             break;
           }
         } else {
@@ -661,14 +601,17 @@ struct FixedSizeBinaryScalarMinMax {
             const auto data = array.GetValues<uint8_t>(1, /*absolute_offset=*/0);
             visit_value(string_view(
                 reinterpret_cast<const char*>(data) + row * byte_width, byte_width));
+            num_valid_values += 1;
           } else if (!options.skip_nulls) {
-            result = string_view();
+            // If we encounter a null, exit the loop and mark num_row_values to
+            // be 0 so we append a null
+            num_valid_values = 0;
             break;
           }
         }
       }
 
-      if (result.empty()) {
+      if (num_valid_values == 0) {
         builder.UnsafeAppendNull();
       } else {
         builder.UnsafeAppend(result);
@@ -678,26 +621,26 @@ struct FixedSizeBinaryScalarMinMax {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     return Status::OK();
   }
 };
 
-Result<ValueDescr> ResolveMinOrMaxOutputType(KernelContext*,
-                                             const std::vector<ValueDescr>& args) {
-  if (args.empty()) {
+Result<TypeHolder> ResolveMinOrMaxOutputType(KernelContext*,
+                                             const std::vector<TypeHolder>& types) {
+  if (types.empty()) {
     return null();
   }
-  auto first_type = args[0].type;
-  for (size_t i = 1; i < args.size(); ++i) {
-    auto type = args[i].type;
+  auto first_type = types[0].type;
+  for (size_t i = 1; i < types.size(); ++i) {
+    auto type = types[i].type;
     if (*type != *first_type) {
       return Status::NotImplemented(
           "Different input types not supported for {min, max}_element_wise");
     }
   }
-  return ValueDescr(first_type, GetBroadcastShape(args));
+  return first_type;
 }
 
 template <typename Op>
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
index 1c555771f51..672a8b27977 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc
@@ -371,16 +371,6 @@ Status RunIfElseScalar(const BooleanScalar& cond, const ExecValue& left,
                        const ExecValue& right, ExecResult* out,
                        const CopyArrayData& copy_array_data,
                        const BroadcastScalar& broadcast_scalar) {
-  if (left.is_scalar() && right.is_scalar()) {  // output will be a scalar
-    if (cond.is_valid) {
-      const Scalar* which_scalar = cond.value ? left.scalar : right.scalar;
-      out->value = which_scalar->Copy();
-    } else {
-      out->value = MakeNullScalar(left.type()->Copy());
-    }
-    return Status::OK();
-  }
-
   // either left or right is an array. Output is always an array`
   ArraySpan* out_array = out->array_span();
   if (!cond.is_valid) {
@@ -390,6 +380,8 @@ Status RunIfElseScalar(const BooleanScalar& cond, const ExecValue& left,
     return Status::OK();
   }
 
+  // One of left or right is an array
+
   // cond is a non-null scalar
   const auto& valid_data = cond.value ? left : right;
   if (valid_data.is_array()) {
@@ -653,22 +645,13 @@ struct IfElseFunctor<Type, enable_if_boolean<Type>> {
 static Status IfElseGenericSXXCall(KernelContext* ctx, const BooleanScalar& cond,
                                    const ExecValue& left, const ExecValue& right,
                                    ExecResult* out) {
-  if (left.is_scalar() && right.is_scalar()) {
-    if (cond.is_valid) {
-      const Scalar* which_scalar = cond.value ? left.scalar : right.scalar;
-      out->value = which_scalar->Copy();
-    } else {
-      out->value = MakeNullScalar(left.type()->Copy());
-    }
-    return Status::OK();
-  }
-  // either left or right is an array. Output is always an array
+  // Either left or right is an array
   int64_t out_arr_len = std::max(left.length(), right.length());
   if (!cond.is_valid) {
     // cond is null; just create a null array
     ARROW_ASSIGN_OR_RAISE(
         std::shared_ptr<Array> result,
-        MakeArrayOfNull(left.type()->Copy(), out_arr_len, ctx->memory_pool()));
+        MakeArrayOfNull(left.type()->GetSharedPtr(), out_arr_len, ctx->memory_pool()));
     out->value = std::move(result->data());
     return Status::OK();
   }
@@ -1063,8 +1046,8 @@ struct NestedIfElseExec {
   static Status RunLoop(KernelContext* ctx, const ArraySpan& cond, ExecResult* out,
                         HandleLeft&& handle_left, HandleRight&& handle_right) {
     std::unique_ptr<ArrayBuilder> raw_builder;
-    RETURN_NOT_OK(
-        MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+    RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                        &raw_builder));
     RETURN_NOT_OK(raw_builder->Reserve(out->length()));
 
     const auto* cond_data = cond.buffers[1].data;
@@ -1169,14 +1152,9 @@ struct ResolveIfElseExec {
 template <typename AllocateMem>
 struct ResolveIfElseExec<NullType, AllocateMem> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    // if all are scalars, return a null scalar
-    if (batch[0].is_scalar() && batch[1].is_scalar() && batch[2].is_scalar()) {
-      out->value = MakeNullScalar(null());
-    } else {
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
-                            MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
-      out->value = std::move(result->data());
-    }
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> result,
+                          MakeArrayOfNull(null(), batch.length, ctx->memory_pool()));
+    out->value = std::move(result->data());
     return Status::OK();
   }
 };
@@ -1184,29 +1162,29 @@ struct ResolveIfElseExec<NullType, AllocateMem> {
 struct IfElseFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
     // Do not DispatchExact here because it'll let through something like (bool,
     // timestamp[s], timestamp[s, "UTC"])
 
-    // if 0th descriptor is null, replace with bool
-    if (values->at(0).type->id() == Type::NA) {
-      values->at(0).type = boolean();
+    // if 0th type is null, replace with bool
+    if (types->at(0).id() == Type::NA) {
+      (*types)[0] = boolean();
     }
 
-    // if-else 0'th descriptor is bool, so skip it
-    ValueDescr* left_arg = &(*values)[1];
+    // if-else 0'th type is bool, so skip it
+    TypeHolder* left_arg = &(*types)[1];
     constexpr size_t num_args = 2;
 
     internal::ReplaceNullWithOtherType(left_arg, num_args);
 
     // If both are identical dictionary types, dispatch to the dictionary kernel
     // TODO(ARROW-14105): apply implicit casts to dictionary types too
-    ValueDescr* right_arg = &(*values)[2];
-    if (is_dictionary(left_arg->type->id()) && left_arg->type->Equals(right_arg->type)) {
-      auto kernel = DispatchExactImpl(this, *values);
+    TypeHolder* right_arg = &(*types)[2];
+    if (is_dictionary(left_arg->id()) && left_arg->type->Equals(*right_arg->type)) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
@@ -1219,13 +1197,13 @@ struct IfElseFunction : ScalarFunction {
       internal::ReplaceTypes(type, left_arg, num_args);
     } else if (auto type = internal::CommonBinary(left_arg, num_args)) {
       internal::ReplaceTypes(type, left_arg, num_args);
-    } else if (HasDecimal(*values)) {
+    } else if (HasDecimal(*types)) {
       RETURN_NOT_OK(CastDecimalArgs(left_arg, num_args));
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1252,7 +1230,7 @@ void AddPrimitiveIfElseKernels(const std::shared_ptr<ScalarFunction>& scalar_fun
       auto unit = checked_cast<const TimestampType&>(*type).unit();
       sig = KernelSignature::Make(
           {boolean(), match::TimestampTypeUnit(unit), match::TimestampTypeUnit(unit)},
-          OutputType(LastType));
+          LastType);
     } else {
       sig = KernelSignature::Make({boolean(), type, type}, type);
     }
@@ -1285,8 +1263,7 @@ void AddBinaryIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_functi
 template <typename T>
 void AddFixedWidthIfElseKernel(const std::shared_ptr<IfElseFunction>& scalar_function) {
   auto type_id = T::type_id;
-  ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)},
-                      OutputType(LastType),
+  ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType,
                       ResolveIfElseExec<T, /*AllocateMem=*/std::false_type>::Exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::PREALLOCATE;
@@ -1299,8 +1276,8 @@ void AddNestedIfElseKernels(const std::shared_ptr<IfElseFunction>& scalar_functi
   for (const auto type_id :
        {Type::LIST, Type::LARGE_LIST, Type::FIXED_SIZE_LIST, Type::STRUCT,
         Type::DENSE_UNION, Type::SPARSE_UNION, Type::DICTIONARY}) {
-    ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)},
-                        OutputType(LastType), NestedIfElseExec::Exec);
+    ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType,
+                        NestedIfElseExec::Exec);
     kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
     kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
     kernel.can_write_into_slices = false;
@@ -1384,21 +1361,21 @@ void CopyOneValue(const ExecValue& in_values, const int64_t in_offset, uint8_t*
 struct CaseWhenFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
     // The first function is a struct of booleans, where the number of fields in the
     // struct is either equal to the number of other arguments or is one less.
-    RETURN_NOT_OK(CheckArity(*values));
-    auto first_type = (*values)[0].type;
+    RETURN_NOT_OK(CheckArity(types->size()));
+    auto first_type = (*types)[0].type;
     if (first_type->id() != Type::STRUCT) {
       return Status::TypeError("case_when: first argument must be STRUCT, not ",
                                *first_type);
     }
     auto num_fields = static_cast<size_t>(first_type->num_fields());
-    if (num_fields < values->size() - 2 || num_fields >= values->size()) {
+    if (num_fields < types->size() - 2 || num_fields >= types->size()) {
       return Status::Invalid(
           "case_when: number of struct fields must be equal to or one less than count of "
           "remaining arguments (",
-          values->size() - 1, "), got: ", first_type->num_fields());
+          types->size() - 1, "), got: ", first_type->num_fields());
     }
     for (const auto& field : first_type->fields()) {
       if (field->type()->id() != Type::BOOL) {
@@ -1409,18 +1386,17 @@ struct CaseWhenFunction : ScalarFunction {
     }
 
     // TODO(ARROW-14105): also apply casts to dictionary indices/values
-    if (is_dictionary((*values)[1].type->id()) &&
-        std::all_of(values->begin() + 2, values->end(), [&](const ValueDescr& descr) {
-          return descr.type->Equals(*(*values)[1].type);
-        })) {
-      auto kernel = DispatchExactImpl(this, *values);
+    if (is_dictionary((*types)[1].id()) &&
+        std::all_of(types->begin() + 2, types->end(),
+                    [&](const TypeHolder& type) { return type == (*types)[1]; })) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
 
-    EnsureDictionaryDecoded(values);
-    ValueDescr* first_arg = &(*values)[1];
-    const size_t num_args = values->size() - 1;
+    EnsureDictionaryDecoded(types);
+    TypeHolder* first_arg = &(*types)[1];
+    const size_t num_args = types->size() - 1;
     if (auto type = CommonNumeric(first_arg, num_args)) {
       ReplaceTypes(type, first_arg, num_args);
     }
@@ -1430,11 +1406,11 @@ struct CaseWhenFunction : ScalarFunction {
     if (auto type = CommonTemporal(first_arg, num_args)) {
       ReplaceTypes(type, first_arg, num_args);
     }
-    if (HasDecimal(*values)) {
+    if (HasDecimal(*types)) {
       RETURN_NOT_OK(CastDecimalArgs(first_arg, num_args));
     }
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -1462,16 +1438,11 @@ Status ExecScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult*
       break;
     }
   }
-  if (out->is_scalar()) {
-    out->value =
-        result.is_scalar() ? result.scalar->Copy() : MakeNullScalar(out->type()->Copy());
-    return Status::OK();
-  }
 
   std::shared_ptr<Scalar> temp;
   if (!has_result) {
     // All conditions false, no 'else' argument
-    temp = MakeNullScalar(out->type()->Copy());
+    temp = MakeNullScalar(out->type()->GetSharedPtr());
     result = temp.get();
   }
 
@@ -1517,7 +1488,9 @@ template <typename Type>
 Status ExecArrayCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const ArraySpan& conds_array = batch[0].array;
   if (conds_array.GetNullCount() > 0) {
-    return Status::Invalid("cond struct must not have top-level nulls");
+    return Status::Invalid(
+        "cond struct must not be a null scalar or "
+        "have top-level nulls");
   }
   ArraySpan* output = out->array_span();
   const int64_t out_offset = output->offset;
@@ -1667,17 +1640,11 @@ Status ExecVarWidthScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch,
       break;
     }
   }
-  if (out->is_scalar()) {
-    DCHECK(result.is_scalar() || !has_result);
-    out->value =
-        result.is_scalar() ? result.scalar->Copy() : MakeNullScalar(out->type()->Copy());
-    return Status::OK();
-  }
   if (!has_result) {
     // All conditions false, no 'else' argument
     ARROW_ASSIGN_OR_RAISE(
         std::shared_ptr<Array> array,
-        MakeArrayOfNull(out->type()->Copy(), batch.length, ctx->memory_pool()));
+        MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length, ctx->memory_pool()));
     out->value = std::move(array->data());
   } else if (result.is_scalar()) {
     ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*result.scalar, batch.length,
@@ -1697,8 +1664,8 @@ static Status ExecVarWidthArrayCaseWhenImpl(
   const ArraySpan& conds_array = batch[0].array;
   const bool have_else_arg = conds_array.type->num_fields() < (batch.num_values() - 1);
   std::unique_ptr<ArrayBuilder> raw_builder;
-  RETURN_NOT_OK(
-      MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+  RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                      &raw_builder));
   RETURN_NOT_OK(raw_builder->Reserve(batch.length));
   RETURN_NOT_OK(reserve_data(raw_builder.get()));
 
@@ -1920,10 +1887,6 @@ struct CaseWhenFunctor<Type, enable_if_union<Type>> {
     if (batch[0].is_scalar()) {
       return ExecVarWidthScalarCaseWhen(ctx, batch, out);
     }
-    return ExecArray(ctx, batch, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     return ExecVarWidthArrayCaseWhen(ctx, batch, out, ReserveNoData);
   }
 };
@@ -1949,50 +1912,38 @@ struct CaseWhenFunctor<DictionaryType> {
 struct CoalesceFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
     using arrow::compute::detail::DispatchExactImpl;
 
     // TODO(ARROW-14105): also apply casts to dictionary indices/values
-    if (is_dictionary((*values)[0].type->id()) &&
-        std::all_of(values->begin() + 1, values->end(), [&](const ValueDescr& descr) {
-          return descr.type->Equals(*(*values)[0].type);
-        })) {
-      auto kernel = DispatchExactImpl(this, *values);
+    if (is_dictionary((*types)[0].id()) &&
+        std::all_of(types->begin() + 1, types->end(),
+                    [&](const TypeHolder& type) { return type == (*types)[0]; })) {
+      auto kernel = DispatchExactImpl(this, *types);
       DCHECK(kernel);
       return kernel;
     }
 
     // Do not DispatchExact here since we want to rescale decimals if necessary
-    EnsureDictionaryDecoded(values);
-    if (auto type = CommonNumeric(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    EnsureDictionaryDecoded(types);
+    if (auto type = CommonNumeric(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (auto type = CommonBinary(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonBinary(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (auto type = CommonTemporal(values->data(), values->size())) {
-      ReplaceTypes(type, values);
+    if (auto type = CommonTemporal(types->data(), types->size())) {
+      ReplaceTypes(type, types);
     }
-    if (HasDecimal(*values)) {
-      RETURN_NOT_OK(CastDecimalArgs(values->data(), values->size()));
+    if (HasDecimal(*types)) {
+      RETURN_NOT_OK(CastDecimalArgs(types->data(), types->size()));
     }
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
-// Implement a 'coalesce' (SQL) operator for any number of scalar inputs
-Status ExecScalarCoalesce(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  for (const auto& value : batch.values) {
-    if (value.scalar->is_valid) {
-      out->value = value.scalar->Copy();
-      break;
-    }
-  }
-  return Status::OK();
-}
-
 // Helper: copy from a source value into all null slots of the output
 template <typename Type>
 void CopyValuesAllValid(const ExecValue& source, uint8_t* out_valid, uint8_t* out_values,
@@ -2187,13 +2138,6 @@ Status ExecArrayScalarCoalesce(KernelContext* ctx, const ExecValue& left,
 template <typename Type>
 Status ExecBinaryCoalesce(KernelContext* ctx, const ExecValue& left,
                           const ExecValue& right, int64_t length, ExecResult* out) {
-  // TODO(wesm): remove the scalar output path
-  if (left.is_scalar() && right.is_scalar()) {
-    // Both scalar
-    out->value = (left.scalar->is_valid ? left : right).scalar->Copy();
-    return Status::OK();
-  }
-
   ArraySpan* output = out->array_span();
   const int64_t out_offset = output->offset;
   uint8_t* out_valid = output->buffers[0].data;
@@ -2275,8 +2219,8 @@ static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecSpan& batch
     break;
   }
   std::unique_ptr<ArrayBuilder> raw_builder;
-  RETURN_NOT_OK(
-      MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+  RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                      &raw_builder));
   RETURN_NOT_OK(raw_builder->Reserve(batch.length));
   RETURN_NOT_OK(reserve_data(raw_builder.get()));
 
@@ -2303,7 +2247,7 @@ static Status ExecVarWidthCoalesceImpl(KernelContext* ctx, const ExecSpan& batch
   }
   ARROW_ASSIGN_OR_RAISE(auto temp_output, raw_builder->Finish());
   out->value = std::move(temp_output->data());
-  out->array_data()->type = batch[0].type()->Copy();
+  out->array_data()->type = batch[0].type()->GetSharedPtr();
   return Status::OK();
 }
 
@@ -2326,12 +2270,7 @@ struct CoalesceFunctor {
     if (batch.num_values() == 2) {
       return ExecBinaryCoalesce<Type>(ctx, batch[0], batch[1], batch.length, out);
     }
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArrayCoalesce<Type>(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArrayCoalesce<Type>(ctx, batch, out);
   }
 };
 
@@ -2352,12 +2291,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
       // Specialized implementation for common case ('fill_null' operation)
       return ExecArrayScalar(ctx, batch[0].array, *batch[1].scalar, out);
     }
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArray(ctx, batch, out);
   }
 
   static Status ExecArrayScalar(KernelContext* ctx, const ArraySpan& left,
@@ -2368,7 +2302,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
       out->value = left.ToArrayData();
       return Status::OK();
     }
-    BuilderType builder(left.type->Copy(), ctx->memory_pool());
+    BuilderType builder(left.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(left.length));
     const auto& scalar = checked_cast<const BaseBinaryScalar&>(right);
     const offset_type* offsets = left.GetValues<offset_type>(1);
@@ -2387,7 +2321,7 @@ struct CoalesceFunctor<Type, enable_if_base_binary<Type>> {
 
     ARROW_ASSIGN_OR_RAISE(auto temp_output, builder.Finish());
     out->value = std::move(temp_output->data());
-    out->array_data()->type = left.type->Copy();
+    out->array_data()->type = left.type->GetSharedPtr();
     return Status::OK();
   }
 
@@ -2424,12 +2358,7 @@ struct CoalesceFunctor<
                       !is_union_type<Type>::value>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     RETURN_NOT_OK(CheckIdenticalTypes(&batch.values[0], batch.num_values()));
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalarCoalesce(ctx, batch, out);
+    return ExecArray(ctx, batch, out);
   }
 
   static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
@@ -2437,33 +2366,34 @@ struct CoalesceFunctor<
   }
 };
 
+const Scalar& GetUnionScalar(const DenseUnionType&, const Scalar& value) {
+  return *checked_cast<const DenseUnionScalar&>(value).value;
+}
+
+const Scalar& GetUnionScalar(const SparseUnionType&, const Scalar& value) {
+  const auto& union_scalar = checked_cast<const SparseUnionScalar&>(value);
+  return *union_scalar.value[union_scalar.child_id];
+}
+
 template <typename Type>
 struct CoalesceFunctor<Type, enable_if_union<Type>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     // Unions don't have top-level nulls, so a specialized implementation is needed
     RETURN_NOT_OK(CheckIdenticalTypes(&batch.values[0], batch.num_values()));
 
-    for (const auto& value : batch.values) {
-      if (value.is_array()) {
-        return ExecArray(ctx, batch, out);
-      }
-    }
-    return ExecScalar(ctx, batch, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     std::unique_ptr<ArrayBuilder> raw_builder;
-    RETURN_NOT_OK(
-        MakeBuilderExactIndex(ctx->memory_pool(), out->type()->Copy(), &raw_builder));
+    RETURN_NOT_OK(MakeBuilderExactIndex(ctx->memory_pool(), out->type()->GetSharedPtr(),
+                                        &raw_builder));
     RETURN_NOT_OK(raw_builder->Reserve(batch.length));
 
-    const UnionType& type = checked_cast<const UnionType&>(*out->type());
+    const auto& type = checked_cast<const Type&>(*out->type());
     for (int64_t i = 0; i < batch.length; i++) {
       bool set = false;
       for (const auto& value : batch.values) {
         if (value.is_scalar()) {
-          const auto& scalar = checked_cast<const UnionScalar&>(*value.scalar);
-          if (scalar.is_valid && scalar.value->is_valid) {
+          const Scalar& scalar = *value.scalar;
+          const auto& union_scalar = GetUnionScalar(type, scalar);
+          if (scalar.is_valid && union_scalar.is_valid) {
             RETURN_NOT_OK(raw_builder->AppendScalar(scalar));
             set = true;
             break;
@@ -2502,18 +2432,6 @@ struct CoalesceFunctor<Type, enable_if_union<Type>> {
     out->value = std::move(temp_output->data());
     return Status::OK();
   }
-
-  static Status ExecScalar(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    for (const auto& value : batch.values) {
-      const auto& scalar = checked_cast<const UnionScalar&>(*value.scalar);
-      // Union scalars can have top-level validity
-      if (scalar.is_valid && scalar.value->is_valid) {
-        out->value = value.scalar->Copy();
-        break;
-      }
-    }
-    return Status::OK();
-  }
 };
 
 template <typename Type>
@@ -2525,7 +2443,7 @@ Status ExecScalarChoose(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     if (out->is_array_span()) {
       // TODO(wesm): more graceful implementation than using
       // MakeNullScalar, which is a little bit lazy
-      std::shared_ptr<Scalar> source = MakeNullScalar(out->type()->Copy());
+      std::shared_ptr<Scalar> source = MakeNullScalar(out->type()->GetSharedPtr());
       ArraySpan* output = out->array_span();
       ExecValue copy_source;
       copy_source.SetScalar(source.get());
@@ -2541,16 +2459,10 @@ Status ExecScalarChoose(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     return Status::IndexError("choose: index ", index, " out of range");
   }
   auto source = batch[index + 1];
-  if (out->is_scalar()) {
-    // All inputs to choose were scalar values
-    out->value = source.scalar->Copy();
-  } else {
-    ArraySpan* output = out->array_span();
-    CopyValues<Type>(source, /*row=*/0, batch.length,
-                     output->GetValues<uint8_t>(0, /*absolute_offset=*/0),
-                     output->GetValues<uint8_t>(1, /*absolute_offset=*/0),
-                     output->offset);
-  }
+  ArraySpan* output = out->array_span();
+  CopyValues<Type>(source, /*row=*/0, batch.length,
+                   output->GetValues<uint8_t>(0, /*absolute_offset=*/0),
+                   output->GetValues<uint8_t>(1, /*absolute_offset=*/0), output->offset);
   return Status::OK();
 }
 
@@ -2616,9 +2528,9 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
       const Scalar& index_scalar = *batch[0].scalar;
       if (!index_scalar.is_valid) {
         if (out->is_array_data()) {
-          ARROW_ASSIGN_OR_RAISE(
-              std::shared_ptr<Array> temp_array,
-              MakeArrayOfNull(out->type()->Copy(), batch.length, ctx->memory_pool()));
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> temp_array,
+                                MakeArrayOfNull(out->type()->GetSharedPtr(), batch.length,
+                                                ctx->memory_pool()));
           out->value = std::move(temp_array->data());
         }
         return Status::OK();
@@ -2629,15 +2541,10 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
       }
       const ExecValue& source = batch.values[index + 1];
       if (source.is_scalar()) {
-        if (out->is_array_data()) {
-          ARROW_ASSIGN_OR_RAISE(
-              std::shared_ptr<Array> temp_array,
-              MakeArrayFromScalar(*source.scalar, batch.length, ctx->memory_pool()));
-          out->value = std::move(temp_array->data());
-        } else {
-          DCHECK(out->is_scalar());
-          out->value = source.scalar->Copy();
-        }
+        ARROW_ASSIGN_OR_RAISE(
+            std::shared_ptr<Array> temp_array,
+            MakeArrayFromScalar(*source.scalar, batch.length, ctx->memory_pool()));
+        out->value = std::move(temp_array->data());
       } else {
         DCHECK(out->is_array_data());
         // source is an array
@@ -2648,10 +2555,13 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
     }
 
     // Row-wise implementation
-    BuilderType builder(out->type()->Copy(), ctx->memory_pool());
+    BuilderType builder(out->type()->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(batch.length));
     int64_t reserve_data = 0;
-    for (const auto& value : batch.values) {
+
+    // The first value in the batch is the index array which is int64
+    for (int i = 1; i < batch.num_values(); ++i) {
+      const ExecValue& value = batch[i];
       if (value.is_scalar()) {
         if (!value.scalar->is_valid) continue;
         const auto row_length =
@@ -2681,7 +2591,7 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
         }));
     std::shared_ptr<Array> temp_output;
     RETURN_NOT_OK(builder.Finish(&temp_output));
-    std::shared_ptr<DataType> actual_result_type = out->type()->Copy();
+    std::shared_ptr<DataType> actual_result_type = out->type()->GetSharedPtr();
     out->value = std::move(temp_output->data());
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
     out->array_data()->type = std::move(actual_result_type);
@@ -2711,29 +2621,30 @@ struct ChooseFunctor<Type, enable_if_base_binary<Type>> {
 struct ChooseFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
     // The first argument is always int64 or promoted to it. The kernel is dispatched
     // based on the type of the rest of the arguments.
-    RETURN_NOT_OK(CheckArity(*values));
-    EnsureDictionaryDecoded(values);
-    if (values->front().type->id() != Type::INT64) {
-      values->front().type = int64();
-    }
-    if (auto type = CommonNumeric(values->data() + 1, values->size() - 1)) {
-      for (auto it = values->begin() + 1; it != values->end(); it++) {
-        it->type = type;
+    RETURN_NOT_OK(CheckArity(types->size()));
+    EnsureDictionaryDecoded(types);
+    if (types->front().id() != Type::INT64) {
+      (*types)[0] = int64();
+    }
+    if (auto type = CommonNumeric(types->data() + 1, types->size() - 1)) {
+      for (auto it = types->begin() + 1; it != types->end(); it++) {
+        *it = type;
       }
     }
-    if (auto kernel = DispatchExactImpl(this, {values->back()})) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, {types->front(), types->back()})) {
+      return kernel;
+    }
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
 void AddCaseWhenKernel(const std::shared_ptr<CaseWhenFunction>& scalar_function,
                        detail::GetTypeId get_id, ArrayKernelExec exec) {
   ScalarKernel kernel(
-      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)},
-                            OutputType(LastType),
+      KernelSignature::Make({InputType(Type::STRUCT), InputType(get_id.id)}, LastType,
                             /*is_varargs=*/true),
       exec);
   if (is_fixed_width(get_id.id)) {
@@ -2766,7 +2677,7 @@ void AddBinaryCaseWhenKernels(const std::shared_ptr<CaseWhenFunction>& scalar_fu
 
 void AddCoalesceKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
                        detail::GetTypeId get_id, ArrayKernelExec exec) {
-  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, OutputType(FirstType),
+  ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, FirstType,
                                             /*is_varargs=*/true),
                       exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
@@ -2785,10 +2696,9 @@ void AddPrimitiveCoalesceKernels(const std::shared_ptr<ScalarFunction>& scalar_f
 
 void AddChooseKernel(const std::shared_ptr<ScalarFunction>& scalar_function,
                      detail::GetTypeId get_id, ArrayKernelExec exec) {
-  ScalarKernel kernel(
-      KernelSignature::Make({Type::INT64, InputType(get_id.id)}, OutputType(LastType),
-                            /*is_varargs=*/true),
-      exec);
+  ScalarKernel kernel(KernelSignature::Make({Type::INT64, InputType(get_id.id)}, LastType,
+                                            /*is_varargs=*/true),
+                      exec);
   kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::PREALLOCATE;
   kernel.can_write_into_slices = is_fixed_width(get_id.id);
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 7a2837c84fd..e3e08ad3d92 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -828,8 +828,8 @@ TEST_F(TestIfElseKernel, ParameterizedTypes) {
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       NotImplemented,
       ::testing::HasSubstr("Function 'if_else' has no kernel matching input types "
-                           "(array[bool], array[timestamp[ms, tz=America/New_York]], "
-                           "array[timestamp[s, tz=America/Phoenix]]"),
+                           "(bool, timestamp[ms, tz=America/New_York], "
+                           "timestamp[s, tz=America/Phoenix]"),
       CallFunction("if_else",
                    {cond, ArrayFromJSON(type0, "[0]"), ArrayFromJSON(type1, "[1]")}));
 }
@@ -1018,17 +1018,20 @@ TYPED_TEST(TestIfElseDict, DifferentDictionaries) {
   CheckDictionary("if_else", {MakeNullScalar(boolean()), values1, values2});
 }
 
-template <typename Type>
-class TestCaseWhenNumeric : public ::testing::Test {};
-
-TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
-
 Datum MakeStruct(const std::vector<Datum>& conds) {
-  EXPECT_OK_AND_ASSIGN(auto result, CallFunction("make_struct", conds));
-  return result;
+  if (conds.size() == 0) {
+    // The tests below want a struct scalar when no condition values passed,
+    // not a StructArray of length 0
+    ScalarVector value;
+    return std::make_shared<StructScalar>(value, struct_({}));
+  } else {
+    EXPECT_OK_AND_ASSIGN(Datum result, CallFunction("make_struct", conds));
+    return result;
+  }
 }
 
-TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
+template <typename TypeParam>
+void TestCaseWhenFixedSize() {
   auto type = default_type_instance<TypeParam>();
   auto cond_true = ScalarFromJSON(boolean(), "true");
   auto cond_false = ScalarFromJSON(boolean(), "false");
@@ -1128,13 +1131,15 @@ TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
 
     // Error cases
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not be null"),
-        CallFunction(
-            "case_when",
-            {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
-             Datum(scalar1)}));
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
+        CallFunction("case_when",
+                     {MakeNullScalar(struct_({field("", boolean())})), Datum(scalar1)}));
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
         CallFunction("case_when",
                      {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)),
                       Datum(values1)}));
@@ -1215,19 +1220,28 @@ TYPED_TEST(TestCaseWhenNumeric, FixedSize) {
 
     // Error cases
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not be null"),
-        CallFunction(
-            "case_when",
-            {Datum(std::make_shared<StructScalar>(struct_({field("", boolean())}))),
-             Datum(scalar1)}));
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
+        CallFunction("case_when",
+                     {MakeNullScalar(struct_({field("", boolean())})), Datum(scalar1)}));
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        Invalid, ::testing::HasSubstr("cond struct must not have top-level nulls"),
+        Invalid,
+        ::testing::HasSubstr("cond struct must not be a null scalar or "
+                             "have top-level nulls"),
         CallFunction("case_when",
                      {Datum(*MakeArrayOfNull(struct_({field("", boolean())}), 4)),
                       Datum(values1)}));
   }
 }
 
+template <typename Type>
+class TestCaseWhenNumeric : public ::testing::Test {};
+
+TYPED_TEST_SUITE(TestCaseWhenNumeric, IfElseNumericBasedTypes);
+
+TYPED_TEST(TestCaseWhenNumeric, FixedSize) { TestCaseWhenFixedSize<TypeParam>(); }
+
 TYPED_TEST(TestCaseWhenNumeric, ListOfType) {
   // More minimal test to check type coverage
   auto type = list(default_type_instance<TypeParam>());
@@ -3389,7 +3403,7 @@ TEST(TestChoose, FixedSizeBinary) {
 
 TEST(TestChooseKernel, DispatchBest) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction("choose"));
-  auto Check = [&](std::vector<ValueDescr> original_values) {
+  auto Check = [&](std::vector<TypeHolder> original_values) {
     auto values = original_values;
     ARROW_EXPECT_OK(function->DispatchBest(&values));
     return values;
@@ -3400,12 +3414,12 @@ TEST(TestChooseKernel, DispatchBest) {
   for (auto ty :
        {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64()}) {
     // Index always promoted to int64
-    EXPECT_EQ((std::vector<ValueDescr>{int64(), ty}), Check({ty, ty}));
-    EXPECT_EQ((std::vector<ValueDescr>{int64(), int64(), int64()}),
+    EXPECT_EQ((std::vector<TypeHolder>{int64(), ty}), Check({ty, ty}));
+    EXPECT_EQ((std::vector<TypeHolder>{int64(), int64(), int64()}),
               Check({ty, ty, int64()}));
   }
   // Other arguments promoted separately from index
-  EXPECT_EQ((std::vector<ValueDescr>{int64(), int32(), int32()}),
+  EXPECT_EQ((std::vector<TypeHolder>{int64(), int32(), int32()}),
             Check({int8(), int32(), uint8()}));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 3754cc8e1c0..7f0cc78ea2d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -32,44 +32,24 @@ namespace {
 
 template <typename Type, typename offset_type = typename Type::offset_type>
 Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  using ScalarType = typename TypeTraits<Type>::ScalarType;
-  using OffsetScalarType = typename TypeTraits<Type>::OffsetScalarType;
-
-  if (batch[0].is_array()) {
-    const ArraySpan& arr = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    auto out_values = out_arr->GetValues<offset_type>(1);
-    const offset_type* offsets = arr.GetValues<offset_type>(1);
-    // Offsets are always well-defined and monotonic, even for null values
-    for (int64_t i = 0; i < arr.length; ++i) {
-      *out_values++ = offsets[i + 1] - offsets[i];
-    }
-  } else {
-    const auto& arg0 = batch[0].scalar_as<ScalarType>();
-    if (arg0.is_valid) {
-      checked_cast<OffsetScalarType*>(out->scalar().get())->value =
-          static_cast<offset_type>(arg0.value->length());
-    }
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  auto out_values = out_arr->GetValues<offset_type>(1);
+  const offset_type* offsets = arr.GetValues<offset_type>(1);
+  // Offsets are always well-defined and monotonic, even for null values
+  for (int64_t i = 0; i < arr.length; ++i) {
+    *out_values++ = offsets[i + 1] - offsets[i];
   }
-
   return Status::OK();
 }
 
 Status FixedSizeListValueLength(KernelContext* ctx, const ExecSpan& batch,
                                 ExecResult* out) {
   auto width = checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
-  if (batch[0].is_array()) {
-    const ArraySpan& arr = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    int32_t* out_values = out_arr->GetValues<int32_t>(1);
-    std::fill(out_values, out_values + arr.length, width);
-  } else {
-    const auto& arg0 = batch[0].scalar_as<FixedSizeListScalar>();
-    if (arg0.is_valid) {
-      checked_cast<Int32Scalar*>(out->scalar().get())->value = width;
-    }
-  }
-
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  int32_t* out_values = out_arr->GetValues<int32_t>(1);
+  std::fill(out_values, out_values + arr.length, width);
   return Status::OK();
 }
 
@@ -80,26 +60,48 @@ const FunctionDoc list_value_length_doc{
      "Null values emit a null in the output."),
     {"lists"}};
 
+template <typename ScalarType, typename T = typename ScalarType::ValueType>
+Status GetListElementIndex(const ExecValue& value, T* out) {
+  if (value.is_scalar()) {
+    const auto& index_scalar = value.scalar_as<ScalarType>();
+    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
+      return Status::Invalid("Index must not be null");
+    }
+    *out = index_scalar.value;
+  } else {
+    const ArraySpan& index_array = value.array;
+    if (index_array.length > 1) {
+      return Status::NotImplemented(
+          "list_element not yet implemented for arrays "
+          "of list indices");
+    }
+    if (index_array.GetNullCount() > 0) {
+      return Status::Invalid("Index must not contain nulls");
+    }
+    *out = index_array.GetValues<T>(1)[0];
+  }
+  if (ARROW_PREDICT_FALSE(*out < 0)) {
+    return Status::Invalid("Index ", index,
+                           " is out of bounds: should be greater than or equal to 0");
+  }
+  return Status::OK();
+}
+
 template <typename Type, typename IndexType>
-struct ListElementArray {
+struct ListElement {
   using ListArrayType = typename TypeTraits<Type>::ArrayType;
   using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  using IndexValueType = typename IndexScalarType::ValueType;
   using offset_type = typename Type::offset_type;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
     const ArraySpan& list = batch[0].array;
     const ArraySpan& list_values = list.child_data[0];
     const offset_type* offsets = list.GetValues<offset_type>(1);
 
-    auto index = index_scalar.value;
-    if (ARROW_PREDICT_FALSE(index < 0)) {
-      return Status::Invalid("Index ", index,
-                             " is out of bounds: should be greater than or equal to 0");
-    }
+    IndexValueType index;
+    RETURN_NOT_OK(GetListElementIndex<IndexScalarType>(batch[1], &index));
+
     std::unique_ptr<ArrayBuilder> builder;
 
     const Type* list_type = checked_cast<const Type*>(list.type);
@@ -127,25 +129,18 @@ struct ListElementArray {
 };
 
 template <typename Type, typename IndexType>
-struct FixedSizeListElementArray {
+struct FixedSizeListElement {
   using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
+  using IndexValueType = typename IndexScalarType::ValueType;
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
-
     auto item_size = checked_cast<const FixedSizeListType&>(*batch[0].type()).list_size();
-
     const ArraySpan& list = batch[0].array;
     const ArraySpan& list_values = list.child_data[0];
 
-    auto index = index_scalar.value;
-    if (ARROW_PREDICT_FALSE(index < 0)) {
-      return Status::Invalid("Index ", index,
-                             " is out of bounds: should be greater than or equal to 0");
-    }
+    IndexValueType index;
+    RETURN_NOT_OK(GetListElementIndex<IndexScalarType>(batch[1], &index));
+
     std::unique_ptr<ArrayBuilder> builder;
 
     const Type* list_type = checked_cast<const Type*>(list.type);
@@ -170,37 +165,10 @@ struct FixedSizeListElementArray {
   }
 };
 
-template <typename, typename IndexType>
-struct ListElementScalar {
-  static Status Exec(KernelContext* /*ctx*/, const ExecSpan& batch, ExecResult* out) {
-    using IndexScalarType = typename TypeTraits<IndexType>::ScalarType;
-    const auto& index_scalar = batch[1].scalar_as<IndexScalarType>();
-    if (ARROW_PREDICT_FALSE(!index_scalar.is_valid)) {
-      return Status::Invalid("Index must not be null");
-    }
-    const auto& list_scalar = batch[0].scalar_as<BaseListScalar>();
-    if (ARROW_PREDICT_FALSE(!list_scalar.is_valid)) {
-      out->value = MakeNullScalar(
-          checked_cast<const BaseListType&>(*batch[0].type()).value_type());
-      return Status::OK();
-    }
-    auto list = list_scalar.value;
-    auto index = index_scalar.value;
-    auto len = list->length();
-    if (ARROW_PREDICT_FALSE(index < 0 ||
-                            index >= static_cast<typename IndexType::c_type>(len))) {
-      return Status::Invalid("Index ", index, " is out of bounds: should be in [0, ", len,
-                             ")");
-    }
-    ARROW_ASSIGN_OR_RAISE(out->value, list->GetScalar(index));
-    return Status::OK();
-  }
-};
-
 template <typename InListType, template <typename...> class Functor>
-void AddListElementArrayKernels(ScalarFunction* func) {
+void AddListElementKernels(ScalarFunction* func) {
   for (const auto& index_type : IntTypes()) {
-    auto inputs = {InputType::Array(InListType::type_id), InputType::Scalar(index_type)};
+    auto inputs = {InputType(InListType::type_id), InputType(index_type)};
     auto output = OutputType{ListValuesType};
     auto sig = KernelSignature::Make(std::move(inputs), std::move(output),
                                      /*is_varargs=*/false);
@@ -212,26 +180,10 @@ void AddListElementArrayKernels(ScalarFunction* func) {
   }
 }
 
-void AddListElementArrayKernels(ScalarFunction* func) {
-  AddListElementArrayKernels<ListType, ListElementArray>(func);
-  AddListElementArrayKernels<LargeListType, ListElementArray>(func);
-  AddListElementArrayKernels<FixedSizeListType, FixedSizeListElementArray>(func);
-}
-
-void AddListElementScalarKernels(ScalarFunction* func) {
-  for (const auto list_type_id : {Type::LIST, Type::LARGE_LIST, Type::FIXED_SIZE_LIST}) {
-    for (const auto& index_type : IntTypes()) {
-      auto inputs = {InputType::Scalar(list_type_id), InputType::Scalar(index_type)};
-      auto output = OutputType{ListValuesType};
-      auto sig = KernelSignature::Make(std::move(inputs), std::move(output),
-                                       /*is_varargs=*/false);
-      auto scalar_exec = GenerateInteger<ListElementScalar, void>({index_type->id()});
-      ScalarKernel kernel{std::move(sig), std::move(scalar_exec)};
-      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-      DCHECK_OK(func->AddKernel(std::move(kernel)));
-    }
-  }
+void AddListElementKernels(ScalarFunction* func) {
+  AddListElementKernels<ListType, ListElement>(func);
+  AddListElementKernels<LargeListType, ListElement>(func);
+  AddListElementKernels<FixedSizeListType, FixedSizeListElement>(func);
 }
 
 const FunctionDoc list_element_doc(
@@ -242,7 +194,7 @@ const FunctionDoc list_element_doc(
     {"lists", "index"});
 
 struct StructFieldFunctor {
-  static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
 
     std::shared_ptr<Array> current = MakeArray(batch[0].array.ToArrayData());
@@ -300,43 +252,6 @@ struct StructFieldFunctor {
     return Status::OK();
   }
 
-  static Status ExecScalar(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
-    const Scalar* current = batch[0].scalar;
-    for (const auto& index : options.indices) {
-      RETURN_NOT_OK(CheckIndex(index, *current->type));
-      if (!current->is_valid) {
-        // out should already be a null scalar of the appropriate type
-        return Status::OK();
-      }
-
-      switch (current->type->id()) {
-        case Type::STRUCT: {
-          current = checked_cast<const StructScalar&>(*current).value[index].get();
-          break;
-        }
-        case Type::DENSE_UNION:
-        case Type::SPARSE_UNION: {
-          const auto& union_scalar = checked_cast<const UnionScalar&>(*current);
-          const auto& union_ty = checked_cast<const UnionType&>(*current->type);
-          if (union_scalar.type_code != union_ty.type_codes()[index]) {
-            // out should already be a null scalar of the appropriate type
-            return Status::OK();
-          }
-          current = union_scalar.value.get();
-          break;
-        }
-        default:
-          // Should have been checked in ResolveStructFieldType
-          return Status::TypeError("struct_field: cannot reference child field of type ",
-                                   *current->type);
-      }
-    }
-    // XXX: Revisit the above to see if we can avoid shared_from_this
-    out->value = current->Copy();
-    return Status::OK();
-  }
-
   static Status CheckIndex(int index, const DataType& type) {
     if (!ValidParentType(type)) {
       return Status::TypeError("struct_field: cannot subscript field of type ", type);
@@ -354,28 +269,24 @@ struct StructFieldFunctor {
   }
 };
 
-Result<ValueDescr> ResolveStructFieldType(KernelContext* ctx,
-                                          const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> ResolveStructFieldType(KernelContext* ctx,
+                                          const std::vector<TypeHolder>& types) {
   const auto& options = OptionsWrapper<StructFieldOptions>::Get(ctx);
-  const std::shared_ptr<DataType>* type = &descrs.front().type;
+  const DataType* type = types.front().type;
   for (const auto& index : options.indices) {
-    RETURN_NOT_OK(StructFieldFunctor::CheckIndex(index, **type));
-    type = &(*type)->field(index)->type();
+    RETURN_NOT_OK(StructFieldFunctor::CheckIndex(index, *type));
+    type = type->field(index)->type().get();
   }
-  return ValueDescr(*type, descrs.front().shape);
+  return type;
 }
 
 void AddStructFieldKernels(ScalarFunction* func) {
-  for (const auto shape : {ValueDescr::ARRAY, ValueDescr::SCALAR}) {
-    for (const auto in_type : {Type::STRUCT, Type::DENSE_UNION, Type::SPARSE_UNION}) {
-      ScalarKernel kernel({InputType(in_type, shape)}, OutputType(ResolveStructFieldType),
-                          shape == ValueDescr::ARRAY ? StructFieldFunctor::ExecArray
-                                                     : StructFieldFunctor::ExecScalar,
-                          OptionsWrapper<StructFieldOptions>::Init);
-      kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-      kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
-      DCHECK_OK(func->AddKernel(std::move(kernel)));
-    }
+  for (const auto in_type : {Type::STRUCT, Type::DENSE_UNION, Type::SPARSE_UNION}) {
+    ScalarKernel kernel({in_type}, ResolveStructFieldType, StructFieldFunctor::Exec,
+                        OptionsWrapper<StructFieldOptions>::Init);
+    kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
   }
 }
 
@@ -392,59 +303,45 @@ const FunctionDoc struct_field_doc(
      "An empty list of indices returns the argument unchanged."),
     {"values"}, "StructFieldOptions", /*options_required=*/true);
 
-Result<ValueDescr> MakeStructResolve(KernelContext* ctx,
-                                     const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> MakeStructResolve(KernelContext* ctx,
+                                     const std::vector<TypeHolder>& types) {
   auto names = OptionsWrapper<MakeStructOptions>::Get(ctx).field_names;
   auto nullable = OptionsWrapper<MakeStructOptions>::Get(ctx).field_nullability;
   auto metadata = OptionsWrapper<MakeStructOptions>::Get(ctx).field_metadata;
 
   if (names.size() == 0) {
-    names.resize(descrs.size());
-    nullable.resize(descrs.size(), true);
-    metadata.resize(descrs.size(), nullptr);
+    names.resize(types.size());
+    nullable.resize(types.size(), true);
+    metadata.resize(types.size(), nullptr);
     int i = 0;
     for (auto& name : names) {
       name = std::to_string(i++);
     }
-  } else if (names.size() != descrs.size() || nullable.size() != descrs.size() ||
-             metadata.size() != descrs.size()) {
-    return Status::Invalid("make_struct() was passed ", descrs.size(), " arguments but ",
+  } else if (names.size() != types.size() || nullable.size() != types.size() ||
+             metadata.size() != types.size()) {
+    return Status::Invalid("make_struct() was passed ", types.size(), " arguments but ",
                            names.size(), " field names, ", nullable.size(),
                            " nullability bits, and ", metadata.size(),
                            " metadata dictionaries.");
   }
 
   size_t i = 0;
-  FieldVector fields(descrs.size());
+  FieldVector fields(types.size());
 
-  ValueDescr::Shape shape = ValueDescr::SCALAR;
-  for (const ValueDescr& descr : descrs) {
-    if (descr.shape != ValueDescr::SCALAR) {
-      shape = ValueDescr::ARRAY;
-    } else {
-      switch (descr.type->id()) {
-        case Type::EXTENSION:
-        case Type::DENSE_UNION:
-        case Type::SPARSE_UNION:
-          return Status::NotImplemented("Broadcasting scalars of type ", *descr.type);
-        default:
-          break;
-      }
-    }
-
-    fields[i] =
-        field(std::move(names[i]), descr.type, nullable[i], std::move(metadata[i]));
+  for (const TypeHolder& type : types) {
+    fields[i] = field(std::move(names[i]), type.GetSharedPtr(), nullable[i],
+                      std::move(metadata[i]));
     ++i;
   }
 
-  return ValueDescr{struct_(std::move(fields)), shape};
+  return TypeHolder(struct_(std::move(fields)));
 }
 
 Status MakeStructExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  ARROW_ASSIGN_OR_RAISE(auto descr, MakeStructResolve(ctx, batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(TypeHolder type, MakeStructResolve(ctx, batch.GetTypes()));
 
   for (int i = 0; i < batch.num_values(); ++i) {
-    const auto& field = checked_cast<const StructType&>(*descr.type).field(i);
+    const auto& field = checked_cast<const StructType&>(*type.type).field(i);
     if (batch[i].null_count() > 0 && !field->nullable()) {
       return Status::Invalid("Output field ", field, " (#", i,
                              ") does not allow nulls but the corresponding "
@@ -452,20 +349,9 @@ Status MakeStructExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out
     }
   }
 
-  /// TODO: remove this scalar output modality altogether
-  if (descr.shape == ValueDescr::SCALAR) {
-    ScalarVector scalars(batch.num_values());
-    for (int i = 0; i < batch.num_values(); ++i) {
-      scalars[i] = batch[i].scalar->Copy();
-    }
-    out->value =
-        std::make_shared<StructScalar>(std::move(scalars), std::move(descr.type));
-    return Status::OK();
-  }
-
   ArrayData* out_data = out->array_data().get();
   out_data->length = batch.length;
-  out_data->type = descr.type;
+  out_data->type = type.GetSharedPtr();
   out_data->child_data.resize(batch.num_values());
   for (int i = 0; i < batch.num_values(); ++i) {
     if (batch[i].is_array()) {
@@ -526,13 +412,16 @@ struct MapLookupFunctor {
     return Status::OK();
   }
 
-  static Status ExecMapArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
     const UnboxedKey query_key = UnboxScalar<KeyType>::Unbox(*options.query_key);
 
     const ArraySpan& map = batch[0].array;
     const int32_t* offsets = map.GetValues<int32_t>(1);
 
+    // The struct holding the keys and values may have an offset
+    int64_t kv_offset = map.child_data[0].offset;
+
     // We create a copy of the keys array because we will adjust the
     // offset and length for the map probes below
     ArraySpan map_keys = map.child_data[0].child_data[0];
@@ -553,7 +442,7 @@ struct MapLookupFunctor {
           continue;
         }
 
-        const int32_t item_offset = offsets[map_index];
+        const int64_t item_offset = offsets[map_index] + kv_offset;
         const int32_t item_size = offsets[map_index + 1] - offsets[map_index];
 
         // Adjust the keys view to just the map slot that we are about to search
@@ -584,7 +473,7 @@ struct MapLookupFunctor {
           continue;
         }
 
-        const int32_t item_offset = offsets[map_index];
+        const int64_t item_offset = offsets[map_index] + kv_offset;
         const int32_t item_size = offsets[map_index + 1] - offsets[map_index];
 
         // Adjust the keys view to just the map slot that we are about to search
@@ -608,77 +497,14 @@ struct MapLookupFunctor {
     }
     return Status::OK();
   }
-
-  /// TODO(ARROW-16577): use array path for scalars to avoid having to
-  /// maintain two code paths
-  static Status ExecMapScalar(KernelContext* ctx, const ExecSpan& batch,
-                              ExecResult* out) {
-    const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
-    UnboxedKey query_key = UnboxScalar<KeyType>::Unbox(*options.query_key);
-
-    std::shared_ptr<DataType> item_type =
-        checked_cast<const MapType&>(*batch[0].type()).item_type();
-    const auto& map_scalar = batch[0].scalar_as<MapScalar>();
-
-    if (ARROW_PREDICT_FALSE(!map_scalar.is_valid)) {
-      if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-        out->value = MakeNullScalar(list(item_type));
-      } else {
-        out->value = MakeNullScalar(item_type);
-      }
-      return Status::OK();
-    }
-
-    const auto& struct_array = checked_cast<const StructArray&>(*map_scalar.value);
-    ArraySpan map_keys(*struct_array.data()->child_data[0]);
-
-    // Keys offset and length must be adjusted to match its parent
-    map_keys.length = struct_array.length();
-    map_keys.offset = struct_array.offset();
-
-    if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-      ArraySpan map_items(*struct_array.data()->child_data[1]);
-      // Keys offset and length must be adjusted to match its parent
-      map_items.length = struct_array.length();
-      map_items.offset = struct_array.offset();
-
-      bool found_at_least_one_key = false;
-      std::unique_ptr<ArrayBuilder> builder;
-      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), item_type, &builder));
-      RETURN_NOT_OK(
-          FindMatchingIndices(map_keys, query_key, [&](int64_t index) -> Status {
-            found_at_least_one_key = true;
-            RETURN_NOT_OK(builder->AppendArraySlice(map_items, index, 1));
-            return Status::OK();
-          }));
-      if (!found_at_least_one_key) {
-        out->value = MakeNullScalar(list(item_type));
-      } else {
-        ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
-        ARROW_ASSIGN_OR_RAISE(out->value, MakeScalar(list(item_type), result));
-      }
-    } else { /* occurrence == FIRST || LAST */
-      std::shared_ptr<Array> items = struct_array.field(1);
-      ARROW_ASSIGN_OR_RAISE(
-          int64_t item_index,
-          GetOneMatchingIndex(map_keys, query_key,
-                              options.occurrence == MapLookupOptions::LAST));
-      if (item_index != -1) {
-        ARROW_ASSIGN_OR_RAISE(out->value, items->GetScalar(item_index));
-      } else {
-        out->value = MakeNullScalar(item_type);
-      }
-    }
-    return Status::OK();
-  }
 };
 
-Result<ValueDescr> ResolveMapLookupType(KernelContext* ctx,
-                                        const std::vector<ValueDescr>& descrs) {
+Result<TypeHolder> ResolveMapLookupType(KernelContext* ctx,
+                                        const std::vector<TypeHolder>& types) {
   const auto& options = OptionsWrapper<MapLookupOptions>::Get(ctx);
-  std::shared_ptr<DataType> type = descrs.front().type;
-  std::shared_ptr<DataType> item_type = checked_cast<const MapType&>(*type).item_type();
-  std::shared_ptr<DataType> key_type = checked_cast<const MapType&>(*type).key_type();
+  const auto& type = checked_cast<const MapType&>(*types.front().type);
+  std::shared_ptr<DataType> item_type = type.item_type();
+  std::shared_ptr<DataType> key_type = type.key_type();
 
   if (!options.query_key) {
     return Status::Invalid("map_lookup: query_key can't be empty.");
@@ -692,9 +518,9 @@ Result<ValueDescr> ResolveMapLookupType(KernelContext* ctx,
   }
 
   if (options.occurrence == MapLookupOptions::Occurrence::ALL) {
-    return ValueDescr(list(item_type), descrs.front().shape);
+    return list(item_type);
   } else { /* occurrence == FIRST || LAST */
-    return ValueDescr(item_type, descrs.front().shape);
+    return item_type;
   }
 }
 
@@ -705,10 +531,7 @@ struct ResolveMapLookup {
 
   template <typename KeyType>
   Status Execute() {
-    if (batch[0].is_scalar()) {
-      return MapLookupFunctor<KeyType>::ExecMapScalar(ctx, batch, out);
-    }
-    return MapLookupFunctor<KeyType>::ExecMapArray(ctx, batch, out);
+    return MapLookupFunctor<KeyType>::Exec(ctx, batch, out);
   }
 
   template <typename KeyType>
@@ -780,8 +603,7 @@ void RegisterScalarNested(FunctionRegistry* registry) {
 
   auto list_element =
       std::make_shared<ScalarFunction>("list_element", Arity::Binary(), list_element_doc);
-  AddListElementArrayKernels(list_element.get());
-  AddListElementScalarKernels(list_element.get());
+  AddListElementKernels(list_element.get());
   DCHECK_OK(registry->AddFunction(std::move(list_element)));
 
   auto struct_field =
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index d0f0b12e74f..a2b05c21b79 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -603,11 +603,17 @@ TEST(MakeStruct, Scalar) {
   EXPECT_THAT(MakeStructor({i32, f64, str}),
               ResultWith(Datum(*StructScalar::Make({i32, f64, str}, {"0", "1", "2"}))));
 
-  // No field names or input values is fine
-  EXPECT_THAT(MakeStructor({}), ResultWith(Datum(*StructScalar::Make({}, {}))));
-
   // Three field names but one input value
   EXPECT_THAT(MakeStructor({str}, {"i", "f", "s"}), Raises(StatusCode::Invalid));
+
+  // ARROW-16757: No input values yields empty struct array of length 1
+  ScalarVector value;
+  auto empty_scalar = std::make_shared<StructScalar>(value, struct_({}));
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> empty_result,
+                       MakeArrayFromScalar(*empty_scalar, 0));
+  ASSERT_OK_AND_ASSIGN(Datum empty_actual,
+                       CallFunction("make_struct", std::vector<Datum>({})));
+  AssertDatumsEqual(Datum(empty_result), empty_actual);
 }
 
 TEST(MakeStruct, Array) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
index ee89f20f946..b1ebfd312cc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -87,11 +87,9 @@ const FunctionDoc random_doc{
 
 void RegisterScalarRandom(FunctionRegistry* registry) {
   static auto random_options = RandomOptions::Defaults();
-
   auto random_func = std::make_shared<ScalarFunction>("random", Arity::Nullary(),
                                                       random_doc, &random_options);
-  ScalarKernel kernel{
-      {}, ValueDescr(float64(), ValueDescr::Shape::ARRAY), ExecRandom, RandomState::Init};
+  ScalarKernel kernel{{}, float64(), ExecRandom, RandomState::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   DCHECK_OK(random_func->AddKernel(kernel));
   DCHECK_OK(registry->AddFunction(std::move(random_func)));
diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
index 383ff30f342..7a0834058f0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
@@ -140,13 +140,13 @@ struct UnsignedIntType<8> {
 struct InitStateVisitor {
   KernelContext* ctx;
   SetLookupOptions options;
-  const std::shared_ptr<DataType>& arg_type;
+  TypeHolder arg_type;
   std::unique_ptr<KernelState> result;
 
   InitStateVisitor(KernelContext* ctx, const KernelInitArgs& args)
       : ctx(ctx),
         options(*checked_cast<const SetLookupOptions*>(args.options)),
-        arg_type(args.inputs[0].type) {}
+        arg_type(args.inputs[0]) {}
 
   template <typename Type>
   Status Init() {
@@ -183,7 +183,7 @@ struct InitStateVisitor {
   }
 
   Result<std::unique_ptr<KernelState>> GetResult() {
-    if (arg_type->id() == Type::TIMESTAMP &&
+    if (arg_type.id() == Type::TIMESTAMP &&
         options.value_set.type()->id() == Type::TIMESTAMP) {
       // Other types will fail when casting, so no separate check is needed
       const auto& ty1 = checked_cast<const TimestampType&>(*arg_type);
@@ -193,7 +193,7 @@ struct InitStateVisitor {
             "Cannot compare timestamp with timezone to timestamp without timezone, got: ",
             ty1, " and ", ty2);
       }
-    } else if ((arg_type->id() == Type::STRING || arg_type->id() == Type::LARGE_STRING) &&
+    } else if ((arg_type.id() == Type::STRING || arg_type.id() == Type::LARGE_STRING) &&
                !is_base_binary_like(options.value_set.type()->id())) {
       // This is a bit of a hack, but don't implicitly cast from a non-binary
       // type to string, since most types support casting to string and that
@@ -203,10 +203,11 @@ struct InitStateVisitor {
     }
     if (!options.value_set.is_arraylike()) {
       return Status::Invalid("Set lookup value set must be Array or ChunkedArray");
-    } else if (!options.value_set.type()->Equals(arg_type)) {
+    } else if (!options.value_set.type()->Equals(*arg_type)) {
       ARROW_ASSIGN_OR_RAISE(
           options.value_set,
-          Cast(options.value_set, CastOptions::Safe(arg_type), ctx->exec_context()));
+          Cast(options.value_set, CastOptions::Safe(arg_type.GetSharedPtr()),
+               ctx->exec_context()));
     }
 
     RETURN_NOT_OK(VisitTypeInline(*arg_type, this));
@@ -432,7 +433,7 @@ void AddBasicSetLookupKernels(ScalarKernel kernel,
   std::vector<Type::type> other_types = {Type::BOOL, Type::DECIMAL128, Type::DECIMAL256,
                                          Type::FIXED_SIZE_BINARY};
   for (auto ty : other_types) {
-    kernel.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    kernel.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(kernel));
   }
 }
@@ -505,7 +506,7 @@ class IndexInMetaBinary : public MetaFunction {
 struct SetLookupFunction : ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* values) const override {
     EnsureDictionaryDecoded(values);
     return DispatchExact(*values);
   }
@@ -518,9 +519,7 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
   {
     ScalarKernel isin_base;
     isin_base.init = InitSetLookup;
-    isin_base.exec = TrivialScalarUnaryAsArraysExec(ExecIsIn,
-                                                    /*use_array_span=*/true,
-                                                    NullHandling::OUTPUT_NOT_NULL);
+    isin_base.exec = ExecIsIn;
     isin_base.null_handling = NullHandling::OUTPUT_NOT_NULL;
     auto is_in = std::make_shared<SetLookupFunction>("is_in", Arity::Unary(), is_in_doc);
 
@@ -537,9 +536,7 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {
   {
     ScalarKernel index_in_base;
     index_in_base.init = InitSetLookup;
-    index_in_base.exec = TrivialScalarUnaryAsArraysExec(
-        ExecIndexIn,
-        /*use_array_span=*/true, NullHandling::COMPUTED_PREALLOCATE);
+    index_in_base.exec = ExecIndexIn;
     index_in_base.null_handling = NullHandling::COMPUTED_PREALLOCATE;
     auto index_in =
         std::make_shared<SetLookupFunction>("index_in", Arity::Unary(), index_in_doc);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index dc546b6a995..c362cfa8d99 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -77,15 +77,7 @@ template <typename StringTransform>
 struct FixedSizeBinaryTransformExecBase {
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, transform, batch[0].array, out);
-    }
-    DCHECK(batch[0].is_scalar());
-    return ExecScalar(ctx, transform, batch[0].scalar, out);
-  }
-
-  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
-                          const ArraySpan& input, ExecResult* out) {
+    const ArraySpan& input = batch[0].array;
     ArrayData* output = out->array_data().get();
 
     const int32_t input_width = input.type->byte_width();
@@ -113,28 +105,6 @@ struct FixedSizeBinaryTransformExecBase {
     output->buffers[1] = std::move(values_buffer);
     return Status::OK();
   }
-
-  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
-                           const Scalar* scalar, ExecResult* out) {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
-    if (!input.is_valid) {
-      return Status::OK();
-    }
-    const int32_t out_width = out->type()->byte_width();
-    auto result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-
-    const int32_t data_nbytes = static_cast<int32_t>(input.value->size());
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(out_width));
-    auto encoded_nbytes = static_cast<int32_t>(transform->Transform(
-        input.value->data(), data_nbytes, value_buffer->mutable_data()));
-    if (encoded_nbytes != out_width) {
-      return transform->InvalidInputSequence();
-    }
-
-    result->is_valid = true;
-    result->value = std::move(value_buffer);
-    return Status::OK();
-  }
 };
 
 template <typename StringTransform>
@@ -149,22 +119,16 @@ struct FixedSizeBinaryTransformExecWithState
     return Execute(ctx, &transform, batch, out);
   }
 
-  static Result<ValueDescr> OutputType(KernelContext* ctx,
-                                       const std::vector<ValueDescr>& descrs) {
-    DCHECK_EQ(1, descrs.size());
+  static Result<TypeHolder> OutputType(KernelContext* ctx,
+                                       const std::vector<TypeHolder>& types) {
+    DCHECK_EQ(1, types.size());
     const auto& options = State::Get(ctx);
-    const int32_t input_width = descrs[0].type->byte_width();
+    const int32_t input_width = types[0].type->byte_width();
     const int32_t output_width = StringTransform::FixedOutputSize(options, input_width);
-    return ValueDescr(fixed_size_binary(output_width), descrs[0].shape);
+    return fixed_size_binary(output_width);
   }
 };
 
-template <typename offset_type>
-static int64_t GetVarBinaryValuesLength(const ArraySpan& span) {
-  const offset_type* offsets = span.GetValues<offset_type>(1);
-  return span.length > 0 ? offsets[span.length] - offsets[0] : 0;
-}
-
 template <typename Type1, typename Type2>
 struct StringBinaryTransformBase {
   using ViewType2 = typename GetViewType<Type2>::T;
@@ -185,11 +149,6 @@ struct StringBinaryTransformBase {
   // given input characteristics for different input shapes.
   // The Status parameter should only be set if an error needs to be signaled.
 
-  // Scalar-Scalar
-  virtual Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits, const ViewType2) {
-    return input1_ncodeunits;
-  }
-
   // Scalar-Array
   virtual Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
                                        const ArraySpan&) {
@@ -223,7 +182,6 @@ struct StringBinaryTransformBase {
   //   }
   //   ...
   // };
-  bool enable_scalar_scalar_ = true;
   bool enable_scalar_array_ = true;
   bool enable_array_scalar_ = true;
   bool enable_array_array_ = true;
@@ -256,11 +214,7 @@ struct StringBinaryTransformExecBase {
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_scalar()) {
-      if (batch[1].is_scalar()) {
-        if (transform->enable_scalar_scalar_) {
-          return ExecScalarScalar(ctx, transform, batch[0].scalar, batch[1].scalar, out);
-        }
-      } else if (batch[1].is_array()) {
+      if (batch[1].is_array()) {
         if (transform->enable_scalar_array_) {
           return ExecScalarArray(ctx, transform, batch[0].scalar, batch[1].array, out);
         }
@@ -280,43 +234,6 @@ struct StringBinaryTransformExecBase {
         "Binary string transform has no combination of operand kinds enabled.");
   }
 
-  static Status ExecScalarScalar(KernelContext* ctx, StringTransform* transform,
-                                 const Scalar* scalar1, const Scalar* scalar2,
-                                 ExecResult* out) {
-    if (!scalar1->is_valid || !scalar2->is_valid) {
-      return Status::OK();
-    }
-    const auto& binary_scalar1 = checked_cast<const BaseBinaryScalar&>(*scalar1);
-    const auto input_string = binary_scalar1.value->data();
-    const auto input_ncodeunits = binary_scalar1.value->size();
-    const auto value2 = UnboxScalar<Type2>::Unbox(*scalar2);
-
-    // Calculate max number of output codeunits
-    ARROW_ASSIGN_OR_RAISE(const auto max_output_ncodeunits,
-                          transform->MaxCodeunits(input_ncodeunits, value2));
-    RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
-
-    // Allocate output string
-    const auto output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    output->is_valid = true;
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(max_output_ncodeunits));
-    output->value = value_buffer;
-    auto output_string = output->value->mutable_data();
-
-    // Apply transform
-    ARROW_ASSIGN_OR_RAISE(
-        auto encoded_nbytes_,
-        transform->Transform(input_string, input_ncodeunits, value2, output_string));
-    auto encoded_nbytes = static_cast<offset_type>(encoded_nbytes_);
-    if (encoded_nbytes < 0) {
-      return transform->InvalidInputSequence();
-    }
-    DCHECK_LE(encoded_nbytes, max_output_ncodeunits);
-
-    // Trim the codepoint buffer, since we may have allocated too much
-    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
-  }
-
   static Status ExecArrayScalar(KernelContext* ctx, StringTransform* transform,
                                 const ArraySpan& data1, const Scalar* scalar2,
                                 ExecResult* out) {
@@ -517,68 +434,51 @@ struct StringBinaryTransformExecWithState
 
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
 
-// Transform a buffer of offsets to one which begins with 0 and has same
-// value lengths.
-template <typename T>
-Status GetShiftedOffsets(KernelContext* ctx, const Buffer& input_buffer, int64_t offset,
-                         int64_t length, std::shared_ptr<Buffer>* out) {
-  ARROW_ASSIGN_OR_RAISE(*out, ctx->Allocate((length + 1) * sizeof(T)));
-  const T* input_offsets = reinterpret_cast<const T*>(input_buffer.data()) + offset;
-  T* out_offsets = reinterpret_cast<T*>((*out)->mutable_data());
-  T first_offset = *input_offsets;
-  for (int64_t i = 0; i < length; ++i) {
-    *out_offsets++ = input_offsets[i] - first_offset;
-  }
-  *out_offsets = input_offsets[length] - first_offset;
-  return Status::OK();
-}
-
 // Apply `transform` to input character data- this function cannot change the
 // length
 template <typename Type>
 Status StringDataTransform(KernelContext* ctx, const ExecSpan& batch,
                            TransformFunc transform, ExecResult* out) {
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
   using offset_type = typename Type::offset_type;
 
-  if (batch[0].is_array()) {
-    // TODO(wesm): Rewrite this to note require this, which is expensive
-    std::shared_ptr<ArrayData> input = batch[0].array.ToArrayData();
-    ArrayType input_boxed(input);
-    ArrayData* out_arr = out->array_data().get();
+  const ArraySpan& input = batch[0].array;
+  ArrayData* out_arr = out->array_data().get();
 
-    if (input->offset == 0) {
-      // We can reuse offsets from input
-      out_arr->buffers[1] = input->buffers[1];
+  const auto offsets = input.GetValues<offset_type>(1);
+  int64_t offset_nbytes = (input.length + 1) * sizeof(offset_type);
+  if (input.offset == 0) {
+    // We can reuse offsets from input if the input owns it
+    if (input.buffers[1].owner != nullptr) {
+      out_arr->buffers[1] = input.GetBuffer(1);
     } else {
-      DCHECK(input->buffers[1]);
-      // We must allocate new space for the offsets and shift the existing offsets
-      RETURN_NOT_OK(GetShiftedOffsets<offset_type>(ctx, *input->buffers[1], input->offset,
-                                                   input->length, &out_arr->buffers[1]));
+      RETURN_NOT_OK(ctx->Allocate(offset_nbytes).Value(&out_arr->buffers[1]));
+      std::memcpy(out_arr->buffers[1]->mutable_data(), input.buffers[1].data,
+                  offset_nbytes);
     }
+  } else {
+    // We must allocate new space for the offsets and shift the existing offsets
+    RETURN_NOT_OK(ctx->Allocate(offset_nbytes).Value(&out_arr->buffers[1]));
+    auto out_offsets =
+        reinterpret_cast<offset_type*>(out_arr->buffers[1]->mutable_data());
+    offset_type first_offset = offsets[0];
+    for (int64_t i = 0; i < input.length; ++i) {
+      *out_offsets++ = offsets[i] - first_offset;
+    }
+    *out_offsets = offsets[input.length] - first_offset;
+  }
 
+  int64_t data_nbytes = GetVarBinaryValuesLength<offset_type>(input);
+  if (input.length > 0) {
     // Allocate space for output data
-    int64_t data_nbytes = input_boxed.total_values_length();
-    RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
-    if (input->length > 0) {
-      transform(input->buffers[2]->data() + input_boxed.value_offset(0), data_nbytes,
+    if (data_nbytes > 0) {
+      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&out_arr->buffers[2]));
+      transform(input.buffers[2].data + offsets[0], data_nbytes,
                 out_arr->buffers[2]->mutable_data());
+    } else {
+      // Empty buffer
+      out_arr->buffers[2] = Buffer::FromString("");
     }
-  } else {
-    // Isn't an null output scalar already created? Anyway this code
-    // will be deleted soon per ARROW-16577
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-    auto result =
-        checked_pointer_cast<BaseBinaryScalar>(MakeNullScalar(out->type()->Copy()));
-    if (input.is_valid) {
-      result->is_valid = true;
-      int64_t data_nbytes = input.value->size();
-      RETURN_NOT_OK(ctx->Allocate(data_nbytes).Value(&result->value));
-      transform(input.value->data(), data_nbytes, result->value->mutable_data());
-    }
-    out->value = result;
   }
-
   return Status::OK();
 }
 
@@ -952,12 +852,8 @@ struct BinaryLength {
   static Status FixedSizeExec(KernelContext*, const ExecSpan& batch, ExecResult* out) {
     // Output is preallocated and validity buffer is precomputed
     const int32_t width = batch[0].type()->byte_width();
-    if (batch[0].is_array()) {
-      int32_t* buffer = out->array_span()->GetValues<int32_t>(1);
-      std::fill(buffer, buffer + batch.length, width);
-    } else {
-      checked_cast<Int32Scalar*>(out->scalar().get())->value = width;
-    }
+    int32_t* buffer = out->array_span()->GetValues<int32_t>(1);
+    std::fill(buffer, buffer + batch.length, width);
     return Status::OK();
   }
 };
@@ -1301,25 +1197,12 @@ template <typename Type>
 void StringBoolTransform(KernelContext* ctx, const ExecSpan& batch,
                          StrToBoolTransformFunc transform, ExecResult* out) {
   using offset_type = typename Type::offset_type;
-
-  if (batch[0].is_array()) {
-    const ArraySpan& input = batch[0].array;
-    ArraySpan* out_arr = out->array_span();
-    if (input.length > 0) {
-      transform(
-          reinterpret_cast<const offset_type*>(input.buffers[1].data) + input.offset,
-          input.buffers[2].data, input.length, out_arr->offset, out_arr->buffers[1].data);
-    }
-  } else {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-    if (input.is_valid) {
-      uint8_t result_value = 0;
-      std::array<offset_type, 2> offsets{0,
-                                         static_cast<offset_type>(input.value->size())};
-      transform(offsets.data(), input.value->data(), 1, /*output_offset=*/0,
-                &result_value);
-      out->value = std::make_shared<BooleanScalar>(result_value > 0);
-    }
+  const ArraySpan& input = batch[0].array;
+  ArraySpan* out_arr = out->array_span();
+  if (input.length > 0) {
+    transform(reinterpret_cast<const offset_type*>(input.buffers[1].data) + input.offset,
+              input.buffers[2].data, input.length, out_arr->offset,
+              out_arr->buffers[1].data);
   }
 }
 
@@ -2061,41 +1944,27 @@ struct ReplaceSubstring {
     ValueDataBuilder value_data_builder(ctx->memory_pool());
     OffsetBuilder offset_builder(ctx->memory_pool());
 
-    if (batch[0].is_array()) {
-      // We already know how many strings we have, so we can use Reserve/UnsafeAppend
-      RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
-      offset_builder.UnsafeAppend(0);  // offsets start at 0
-
-      RETURN_NOT_OK(VisitArraySpanInline<Type>(
-          batch[0].array,
-          [&](util::string_view s) {
-            RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
-            offset_builder.UnsafeAppend(
-                static_cast<offset_type>(value_data_builder.length()));
-            return Status::OK();
-          },
-          [&]() {
-            // offset for null value
-            offset_builder.UnsafeAppend(
-                static_cast<offset_type>(value_data_builder.length()));
-            return Status::OK();
-          }));
-      ArrayData* output = out->array_data().get();
-      RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
-      RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar);
-      auto result = std::make_shared<ScalarType>();
-      if (input.is_valid) {
-        util::string_view s = static_cast<util::string_view>(*input.value);
-        RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
-        RETURN_NOT_OK(value_data_builder.Finish(&result->value));
-        result->is_valid = true;
-      }
-      out->value = result;
-    }
+    // We already know how many strings we have, so we can use Reserve/UnsafeAppend
+    RETURN_NOT_OK(offset_builder.Reserve(batch.length + 1));
+    offset_builder.UnsafeAppend(0);  // offsets start at 0
 
-    return Status::OK();
+    RETURN_NOT_OK(VisitArraySpanInline<Type>(
+        batch[0].array,
+        [&](util::string_view s) {
+          RETURN_NOT_OK(replacer.ReplaceString(s, &value_data_builder));
+          offset_builder.UnsafeAppend(
+              static_cast<offset_type>(value_data_builder.length()));
+          return Status::OK();
+        },
+        [&]() {
+          // offset for null value
+          offset_builder.UnsafeAppend(
+              static_cast<offset_type>(value_data_builder.length()));
+          return Status::OK();
+        }));
+    ArrayData* output = out->array_data().get();
+    RETURN_NOT_OK(value_data_builder.Finish(&output->buffers[2]));
+    return offset_builder.Finish(&output->buffers[1]);
   }
 };
 
@@ -2305,19 +2174,20 @@ struct ExtractRegexData {
     return std::move(data);
   }
 
-  Result<ValueDescr> ResolveOutputType(const std::vector<ValueDescr>& args) const {
-    const auto& input_type = args[0].type;
+  Result<TypeHolder> ResolveOutputType(const std::vector<TypeHolder>& types) const {
+    const DataType* input_type = types[0].type;
     if (input_type == nullptr) {
-      // No input type specified => propagate shape
-      return args[0];
+      // No input type specified
+      return nullptr;
     }
     // Input type is either [Large]Binary or [Large]String and is also the type
     // of each field in the output struct type.
     DCHECK(is_base_binary_like(input_type->id()));
     FieldVector fields;
     fields.reserve(group_names.size());
+    std::shared_ptr<DataType> owned_type = input_type->GetSharedPtr();
     std::transform(group_names.begin(), group_names.end(), std::back_inserter(fields),
-                   [&](const std::string& name) { return field(name, input_type); });
+                   [&](const std::string& name) { return field(name, owned_type); });
     return struct_(std::move(fields));
   }
 
@@ -2326,11 +2196,11 @@ struct ExtractRegexData {
       : regex(new RE2(pattern, MakeRE2Options(is_utf8))) {}
 };
 
-Result<ValueDescr> ResolveExtractRegexOutput(KernelContext* ctx,
-                                             const std::vector<ValueDescr>& args) {
+Result<TypeHolder> ResolveExtractRegexOutput(KernelContext* ctx,
+                                             const std::vector<TypeHolder>& types) {
   ExtractRegexOptions options = ExtractRegexState::Get(ctx);
   ARROW_ASSIGN_OR_RAISE(auto data, ExtractRegexData::Make(options));
-  return data.ResolveOutputType(args);
+  return data.ResolveOutputType(types);
 }
 
 struct ExtractRegexBase {
@@ -2380,54 +2250,37 @@ struct ExtractRegex : public ExtractRegexBase {
   Status Extract(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     // TODO: why is this needed? Type resolution should already be
     // done and the output type set in the output variable
-    ARROW_ASSIGN_OR_RAISE(auto descr, data.ResolveOutputType(batch.GetDescriptors()));
-    DCHECK_NE(descr.type, nullptr);
-    const auto& type = descr.type;
-
-    if (batch[0].is_array()) {
-      std::unique_ptr<ArrayBuilder> array_builder;
-      RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
-      StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
-
-      std::vector<BuilderType*> field_builders;
-      field_builders.reserve(group_count);
-      for (int i = 0; i < group_count; i++) {
-        field_builders.push_back(
-            checked_cast<BuilderType*>(struct_builder->field_builder(i)));
-      }
+    ARROW_ASSIGN_OR_RAISE(TypeHolder out_type, data.ResolveOutputType(batch.GetTypes()));
+    DCHECK_NE(out_type.type, nullptr);
+    std::shared_ptr<DataType> type = out_type.GetSharedPtr();
 
-      auto visit_null = [&]() { return struct_builder->AppendNull(); };
-      auto visit_value = [&](util::string_view s) {
-        if (Match(s)) {
-          for (int i = 0; i < group_count; i++) {
-            RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
-          }
-          return struct_builder->Append();
-        } else {
-          return struct_builder->AppendNull();
-        }
-      };
-      RETURN_NOT_OK(VisitArraySpanInline<Type>(batch[0].array, visit_value, visit_null));
+    std::unique_ptr<ArrayBuilder> array_builder;
+    RETURN_NOT_OK(MakeBuilder(ctx->memory_pool(), type, &array_builder));
+    StructBuilder* struct_builder = checked_cast<StructBuilder*>(array_builder.get());
 
-      std::shared_ptr<Array> out_array;
-      RETURN_NOT_OK(struct_builder->Finish(&out_array));
-      out->value = std::move(out_array->data());
-    } else {
-      const auto& input = checked_cast<const ScalarType&>(*batch[0].scalar);
-      auto result = std::make_shared<StructScalar>(type);
-      if (input.is_valid && Match(util::string_view(*input.value))) {
-        result->value.reserve(group_count);
+    std::vector<BuilderType*> field_builders;
+    field_builders.reserve(group_count);
+    for (int i = 0; i < group_count; i++) {
+      field_builders.push_back(
+          checked_cast<BuilderType*>(struct_builder->field_builder(i)));
+    }
+
+    auto visit_null = [&]() { return struct_builder->AppendNull(); };
+    auto visit_value = [&](util::string_view s) {
+      if (Match(s)) {
         for (int i = 0; i < group_count; i++) {
-          result->value.push_back(std::make_shared<ScalarType>(
-              Buffer::FromString(found_values[i].as_string())));
+          RETURN_NOT_OK(field_builders[i]->Append(ToStringView(found_values[i])));
         }
-        result->is_valid = true;
+        return struct_builder->Append();
       } else {
-        result->is_valid = false;
+        return struct_builder->AppendNull();
       }
-      out->value = std::move(result);
-    }
+    };
+    RETURN_NOT_OK(VisitArraySpanInline<Type>(batch[0].array, visit_value, visit_null));
 
+    std::shared_ptr<Array> out_array;
+    RETURN_NOT_OK(struct_builder->Finish(&out_array));
+    out->value = std::move(out_array->data());
     return Status::OK();
   }
 };
@@ -2783,9 +2636,6 @@ struct BinaryJoin {
 
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     if (batch[0].is_scalar()) {
-      if (batch[1].is_scalar()) {
-        return ExecScalarScalar(ctx, *batch[0].scalar, *batch[1].scalar, out);
-      }
       DCHECK(batch[1].is_array());
       return ExecScalarArray(ctx, *batch[0].scalar, batch[1].array, out);
     }
@@ -2832,55 +2682,22 @@ struct BinaryJoin {
     util::string_view GetView(int64_t i) { return separators.GetView(i); }
   };
 
-  // Scalar, scalar -> scalar
-  static Status ExecScalarScalar(KernelContext* ctx, const Scalar& left,
-                                 const Scalar& right, ExecResult* out) {
-    const auto& list = checked_cast<const ListScalarType&>(left);
-    const auto& separator_scalar = checked_cast<const BaseBinaryScalar&>(right);
-    if (!list.is_valid || !separator_scalar.is_valid) {
-      return Status::OK();
-    }
-    util::string_view separator(*separator_scalar.value);
-
-    const auto& strings = checked_cast<const ArrayType&>(*list.value);
-    if (strings.null_count() > 0) {
-      out->scalar()->is_valid = false;
-      return Status::OK();
-    }
-
-    TypedBufferBuilder<uint8_t> builder(ctx->memory_pool());
-    auto Append = [&](util::string_view value) {
-      return builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
-                            static_cast<int64_t>(value.size()));
-    };
-    if (strings.length() > 0) {
-      auto data_length =
-          strings.total_values_length() + (strings.length() - 1) * separator.length();
-      RETURN_NOT_OK(builder.Reserve(data_length));
-      RETURN_NOT_OK(Append(strings.GetView(0)));
-      for (int64_t j = 1; j < strings.length(); j++) {
-        RETURN_NOT_OK(Append(separator));
-        RETURN_NOT_OK(Append(strings.GetView(j)));
-      }
-    }
-    auto out_scalar = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    return builder.Finish(&out_scalar->value);
-  }
-
   // Scalar, array -> array
   static Status ExecScalarArray(KernelContext* ctx, const Scalar& left,
                                 const ArraySpan& right, ExecResult* out) {
     const auto& list_scalar = checked_cast<const BaseListScalar&>(left);
     if (!list_scalar.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto nulls, MakeArrayOfNull(right.type->Copy(), right.length,
-                                                        ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(right.type->GetSharedPtr(), right.length, ctx->memory_pool()));
       out->value = std::move(nulls->data());
       return Status::OK();
     }
     const auto& strings = checked_cast<const ArrayType&>(*list_scalar.value);
     if (strings.null_count() != 0) {
-      ARROW_ASSIGN_OR_RAISE(auto nulls, MakeArrayOfNull(right.type->Copy(), right.length,
-                                                        ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(
+          auto nulls,
+          MakeArrayOfNull(right.type->GetSharedPtr(), right.length, ctx->memory_pool()));
       out->value = std::move(nulls->data());
       return Status::OK();
     }
@@ -3041,68 +2858,6 @@ struct BinaryJoinElementWise {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     JoinOptions options = BinaryJoinElementWiseState::Get(ctx);
     // Last argument is the separator (for consistency with binary_join)
-    // TODO(wesm): eliminate this scalar output modality altogether to
-    // simplify implementation
-    if (std::all_of(batch.values.begin(), batch.values.end(),
-                    [](const ExecValue& d) { return d.is_scalar(); })) {
-      return ExecOnlyScalar(ctx, options, batch, out);
-    }
-    return ExecContainingArrays(ctx, options, batch, out);
-  }
-
-  static Status ExecOnlyScalar(KernelContext* ctx, const JoinOptions& options,
-                               const ExecSpan& batch, ExecResult* out) {
-    BaseBinaryScalar* output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    const int num_args = batch.num_values();
-    if (num_args == 1) {
-      // Only separator, no values
-      output->is_valid = batch[0].scalar->is_valid;
-      if (output->is_valid) {
-        ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(0));
-      }
-      return Status::OK();
-    }
-
-    int64_t final_size = CalculateRowSize(options, batch, 0);
-    if (final_size < 0) {
-      output->is_valid = false;
-      return Status::OK();
-    }
-    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(final_size));
-    const auto separator = UnboxScalar<Type>::Unbox(*batch.values.back().scalar);
-    uint8_t* buf = output->value->mutable_data();
-    bool first = true;
-    for (int i = 0; i < num_args - 1; i++) {
-      const Scalar& scalar = *batch[i].scalar;
-      util::string_view s;
-      if (scalar.is_valid) {
-        s = UnboxScalar<Type>::Unbox(scalar);
-      } else {
-        switch (options.null_handling) {
-          case JoinOptions::EMIT_NULL:
-            // Handled by CalculateRowSize
-            DCHECK(false) << "unreachable";
-            break;
-          case JoinOptions::SKIP:
-            continue;
-          case JoinOptions::REPLACE:
-            s = options.null_replacement;
-            break;
-        }
-      }
-      if (!first) {
-        buf = std::copy(separator.begin(), separator.end(), buf);
-      }
-      first = false;
-      buf = std::copy(s.begin(), s.end(), buf);
-    }
-    output->is_valid = true;
-    DCHECK_EQ(final_size, buf - output->value->mutable_data());
-    return Status::OK();
-  }
-
-  static Status ExecContainingArrays(KernelContext* ctx, const JoinOptions& options,
-                                     const ExecSpan& batch, ExecResult* out) {
     // Presize data to avoid reallocations
     int64_t final_size = 0;
     for (int64_t i = 0; i < batch.length; i++) {
@@ -3185,7 +2940,7 @@ struct BinaryJoinElementWise {
     std::shared_ptr<Array> string_array;
     RETURN_NOT_OK(builder.Finish(&string_array));
     out->value = std::move(string_array->data());
-    out->array_data()->type = batch[0].type()->Copy();
+    out->array_data()->type = batch[0].type()->GetSharedPtr();
     DCHECK_EQ(batch.length, out->array_data()->length);
     DCHECK_EQ(final_size,
               checked_cast<const ArrayType&>(*string_array).total_values_length());
@@ -3300,22 +3055,22 @@ void AddAsciiStringJoin(FunctionRegistry* registry) {
 struct ScalarCTypeToInt64Function : public ScalarFunction {
   using ScalarFunction::ScalarFunction;
 
-  Result<const Kernel*> DispatchBest(std::vector<ValueDescr>* values) const override {
-    RETURN_NOT_OK(CheckArity(*values));
+  Result<const Kernel*> DispatchBest(std::vector<TypeHolder>* types) const override {
+    RETURN_NOT_OK(CheckArity(types->size()));
 
     using arrow::compute::detail::DispatchExactImpl;
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
 
-    EnsureDictionaryDecoded(values);
+    EnsureDictionaryDecoded(types);
 
-    for (auto& descr : *values) {
-      if (is_integer(descr.type->id())) {
-        descr.type = int64();
+    for (auto it = types->begin(); it < types->end(); ++it) {
+      if (is_integer(it->id())) {
+        *it = int64();
       }
     }
 
-    if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
-    return arrow::compute::detail::NoMatchingKernel(this, *values);
+    if (auto kernel = DispatchExactImpl(this, *types)) return kernel;
+    return arrow::compute::detail::NoMatchingKernel(this, *types);
   }
 };
 
@@ -3326,12 +3081,6 @@ struct BinaryRepeatTransform : public StringBinaryTransformBase<Type1, Type2> {
   using offset_type = typename ArrayType1::offset_type;
   using repeat_type = typename Type2::c_type;
 
-  Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
-                               const int64_t num_repeats) override {
-    ARROW_RETURN_NOT_OK(ValidateRepeatCount(num_repeats));
-    return input1_ncodeunits * num_repeats;
-  }
-
   Result<int64_t> MaxCodeunits(const int64_t input1_ncodeunits,
                                const ArraySpan& input2) override {
     int64_t total_num_repeats = 0;
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
index 635aacf671d..32731414e08 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
@@ -48,6 +48,12 @@ struct StringTransformBase {
   }
 };
 
+template <typename offset_type>
+static int64_t GetVarBinaryValuesLength(const ArraySpan& span) {
+  const offset_type* offsets = span.GetValues<offset_type>(1);
+  return span.length > 0 ? offsets[span.length] - offsets[0] : 0;
+}
+
 /// Kernel exec generator for unary string transforms. Types of template
 /// parameter StringTransform need to define a transform method with the
 /// following signature:
@@ -69,22 +75,13 @@ struct StringTransformExecBase {
 
   static Status Execute(KernelContext* ctx, StringTransform* transform,
                         const ExecSpan& batch, ExecResult* out) {
-    if (batch[0].is_array()) {
-      return ExecArray(ctx, transform, batch[0].array, out);
-    }
-    DCHECK(batch[0].is_scalar());
-    // TODO: change to execute with array of length 1
-    return ExecScalar(ctx, transform, batch[0].scalar, out);
-  }
+    const ArraySpan& input = batch[0].array;
+    auto offsets = input.GetValues<offset_type>(1);
+    const uint8_t* input_data = input.buffers[2].data;
 
-  static Status ExecArray(KernelContext* ctx, StringTransform* transform,
-                          const ArraySpan& data, ExecResult* out) {
-    // TODO(wesm): reimplement this to not use the array box type
-    ArrayType input(data.ToArrayData());
-    const int64_t input_ncodeunits = input.total_values_length();
-    const int64_t input_nstrings = input.length();
+    const int64_t input_ncodeunits = GetVarBinaryValuesLength<offset_type>(input);
     const int64_t max_output_ncodeunits =
-        transform->MaxCodeunits(input_nstrings, input_ncodeunits);
+        transform->MaxCodeunits(input.length, input_ncodeunits);
     RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
 
     ArrayData* output = out->array_data().get();
@@ -96,10 +93,10 @@ struct StringTransformExecBase {
     uint8_t* output_str = output->buffers[2]->mutable_data();
     offset_type output_ncodeunits = 0;
     output_string_offsets[0] = output_ncodeunits;
-    for (int64_t i = 0; i < input_nstrings; i++) {
+    for (int64_t i = 0; i < input.length; i++) {
       if (!input.IsNull(i)) {
-        offset_type input_string_ncodeunits;
-        const uint8_t* input_string = input.GetValue(i, &input_string_ncodeunits);
+        const uint8_t* input_string = input_data + offsets[i];
+        offset_type input_string_ncodeunits = offsets[i + 1] - offsets[i];
         auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
             input_string, input_string_ncodeunits, output_str + output_ncodeunits));
         if (encoded_nbytes < 0) {
@@ -115,29 +112,6 @@ struct StringTransformExecBase {
     return values_buffer->Resize(output_ncodeunits, /*shrink_to_fit=*/true);
   }
 
-  static Status ExecScalar(KernelContext* ctx, StringTransform* transform,
-                           const Scalar* scalar, ExecResult* out) {
-    const auto& input = checked_cast<const BaseBinaryScalar&>(*scalar);
-    if (!input.is_valid) {
-      return Status::OK();
-    }
-    const int64_t data_nbytes = static_cast<int64_t>(input.value->size());
-    const int64_t max_output_ncodeunits = transform->MaxCodeunits(1, data_nbytes);
-    RETURN_NOT_OK(CheckOutputCapacity(max_output_ncodeunits));
-
-    ARROW_ASSIGN_OR_RAISE(auto value_buffer, ctx->Allocate(max_output_ncodeunits));
-    auto* result = checked_cast<BaseBinaryScalar*>(out->scalar().get());
-    result->is_valid = true;
-    result->value = value_buffer;
-    auto encoded_nbytes = static_cast<offset_type>(transform->Transform(
-        input.value->data(), data_nbytes, value_buffer->mutable_data()));
-    if (encoded_nbytes < 0) {
-      return transform->InvalidInputSequence();
-    }
-    DCHECK_LE(encoded_nbytes, max_output_ncodeunits);
-    return value_buffer->Resize(encoded_nbytes, /*shrink_to_fit=*/true);
-  }
-
   static Status CheckOutputCapacity(int64_t ncodeunits) {
     if (ncodeunits > std::numeric_limits<offset_type>::max()) {
       return Status::CapacityError(
@@ -245,27 +219,15 @@ struct StringPredicateFunctor {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     Status st = Status::OK();
     EnsureUtf8LookupTablesFilled();
-    if (batch[0].is_array()) {
-      const ArraySpan& input = batch[0].array;
-      ArrayIterator<Type> input_it(input);
-      ArraySpan* out_arr = out->array_span();
-      ::arrow::internal::GenerateBitsUnrolled(
-          out_arr->buffers[1].data, out_arr->offset, input.length, [&]() -> bool {
-            util::string_view val = input_it();
-            return Predicate::Call(ctx, reinterpret_cast<const uint8_t*>(val.data()),
-                                   val.size(), &st);
-          });
-    } else {
-      const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar);
-      if (input.is_valid) {
-        bool boolean_result = Predicate::Call(
-            ctx, input.value->data(), static_cast<size_t>(input.value->size()), &st);
-        // UTF decoding can lead to issues
-        if (st.ok()) {
-          out->value = std::make_shared<BooleanScalar>(boolean_result);
-        }
-      }
-    }
+    const ArraySpan& input = batch[0].array;
+    ArrayIterator<Type> input_it(input);
+    ArraySpan* out_arr = out->array_span();
+    ::arrow::internal::GenerateBitsUnrolled(
+        out_arr->buffers[1].data, out_arr->offset, input.length, [&]() -> bool {
+          util::string_view val = input_it();
+          return Predicate::Call(ctx, reinterpret_cast<const uint8_t*>(val.data()),
+                                 val.size(), &st);
+        });
     return st;
   }
 };
@@ -357,17 +319,8 @@ struct StringSplitExec {
   Status Execute(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     SplitFinder finder;
     RETURN_NOT_OK(finder.PreExec(options));
-    if (batch[0].is_array()) {
-      return Execute(ctx, &finder, batch[0].array, out);
-    } else {
-      return Execute(ctx, &finder, batch[0].scalar, out);
-    }
-  }
-
-  Status Execute(KernelContext* ctx, SplitFinder* finder, const ArraySpan& data,
-                 ExecResult* out) {
     // TODO(wesm): refactor to not require creating ArrayData
-    const ArrayType input(data.ToArrayData());
+    const ArrayType input(batch[0].array.ToArrayData());
 
     BuilderType builder(input.type(), ctx->memory_pool());
     // A slight overestimate of the data needed
@@ -383,7 +336,7 @@ struct StringSplitExec {
     *list_offsets++ = 0;
     for (int64_t i = 0; i < input.length(); ++i) {
       if (!input.IsNull(i)) {
-        RETURN_NOT_OK(SplitString(input.GetView(i), finder, &builder));
+        RETURN_NOT_OK(SplitString(input.GetView(i), &finder, &builder));
         if (ARROW_PREDICT_FALSE(builder.length() >
                                 std::numeric_limits<list_offset_type>::max())) {
           return Status::CapacityError("List offset does not fit into 32 bit");
@@ -398,20 +351,6 @@ struct StringSplitExec {
     return Status::OK();
   }
 
-  Status Execute(KernelContext* ctx, SplitFinder* finder, const Scalar* scalar,
-                 ExecResult* out) {
-    const auto& input = checked_cast<const ScalarType&>(*scalar);
-    auto result = checked_cast<ListScalarType*>(out->scalar().get());
-    if (input.is_valid) {
-      result->is_valid = true;
-      BuilderType builder(input.type, ctx->memory_pool());
-      util::string_view s(*input.value);
-      RETURN_NOT_OK(SplitString(s, finder, &builder));
-      RETURN_NOT_OK(builder.Finish(&result->value));
-    }
-    return Status::OK();
-  }
-
   Status SplitString(const util::string_view& s, SplitFinder* finder,
                      BuilderType* builder) {
     const uint8_t* begin = reinterpret_cast<const uint8_t*>(s.data());
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 434448c6978..02585ed34ac 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -605,15 +605,8 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const auto& options = State::Get(ctx);
     Utf8NormalizeExec exec{options};
-    if (batch[0].is_array()) {
-      return exec.ExecArray(ctx, batch[0].array, out);
-    } else {
-      DCHECK(batch[0].is_scalar());
-      return exec.ExecScalar(ctx, *batch[0].scalar, out);
-    }
-  }
 
-  Status ExecArray(KernelContext* ctx, const ArraySpan& array, ExecResult* out) {
+    const ArraySpan& array = batch[0].array;
     BufferBuilder data_builder(ctx->memory_pool());
 
     const offset_type* in_offsets = array.GetValues<offset_type>(1);
@@ -631,7 +624,7 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
     RETURN_NOT_OK(VisitArraySpanInline<Type>(
         array,
         [&](util::string_view v) {
-          ARROW_ASSIGN_OR_RAISE(auto n_bytes, Decompose(v, &data_builder));
+          ARROW_ASSIGN_OR_RAISE(auto n_bytes, exec.Decompose(v, &data_builder));
           offset += n_bytes;
           *out_offsets++ = static_cast<offset_type>(offset);
           return Status::OK();
@@ -643,19 +636,6 @@ struct Utf8NormalizeExec : public Utf8NormalizeBase {
 
     return data_builder.Finish(&output->buffers[2]);
   }
-
-  Status ExecScalar(KernelContext* ctx, const Scalar& scalar, ExecResult* out) {
-    if (scalar.is_valid) {
-      const auto& string_scalar = checked_cast<const ScalarType&>(scalar);
-      auto* out_scalar = checked_cast<ScalarType*>(out->scalar().get());
-
-      BufferBuilder data_builder(ctx->memory_pool());
-      RETURN_NOT_OK(Decompose(string_scalar.view(), &data_builder));
-      RETURN_NOT_OK(data_builder.Finish(&out_scalar->value));
-      out_scalar->is_valid = true;
-    }
-    return Status::OK();
-  }
 };
 
 const FunctionDoc utf8_normalize_doc(
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index f8da1338b5a..212f9bdad7f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -65,7 +65,6 @@ using arrow_vendored::date::literals::mon;
 using arrow_vendored::date::literals::sun;
 using arrow_vendored::date::literals::thu;
 using arrow_vendored::date::literals::wed;
-using internal::applicator::SimpleUnary;
 using std::chrono::duration_cast;
 using std::chrono::hours;
 using std::chrono::minutes;
@@ -332,23 +331,8 @@ struct YearMonthDayVisitValueFunction<Duration, TimestampType, BuilderType> {
 
 template <typename Duration, typename InType>
 struct YearMonthDay {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* out_scalar = out->scalar().get();
-    if (in.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto year_month_day,
-                            (YearMonthDayWrapper<Duration, InType>::Get(in)));
-      ScalarVector values = {std::make_shared<Int64Scalar>(year_month_day[0]),
-                             std::make_shared<Int64Scalar>(year_month_day[1]),
-                             std::make_shared<Int64Scalar>(year_month_day[2])};
-      *checked_cast<StructScalar*>(out_scalar) =
-          StructScalar(std::move(values), YearMonthDayType());
-    } else {
-      out_scalar->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
 
     std::unique_ptr<ArrayBuilder> array_builder;
@@ -1190,23 +1174,8 @@ struct Strftime {
     return Strftime{options, tz, std::move(locale)};
   }
 
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
-    TimestampFormatter<Duration> formatter{self.options.format, self.tz, self.locale};
-
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      const int64_t in_val = internal::UnboxScalar<const InType>::Unbox(in);
-      ARROW_ASSIGN_OR_RAISE(auto formatted, formatter(in_val));
-      checked_cast<StringScalar*>(output)->value =
-          Buffer::FromString(std::move(formatted));
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
     TimestampFormatter<Duration> formatter{self.options.format, self.tz, self.locale};
 
@@ -1273,31 +1242,8 @@ struct Strptime {
                     options.error_is_null};
   }
 
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
-
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      auto s = internal::UnboxScalar<InType>::Unbox(in);
-      int64_t result;
-      if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
-        *checked_cast<TimestampScalar*>(output) =
-            TimestampScalar(result, timestamp(self.unit, self.zone));
-      } else {
-        if (self.error_is_null) {
-          output->is_valid = false;
-        } else {
-          return Status::Invalid("Failed to parse string: '", s, "' as a scalar of type ",
-                                 TimestampType(self.unit).ToString());
-        }
-      }
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
 
     ArraySpan* out_span = out->array_span();
@@ -1360,25 +1306,23 @@ struct Strptime {
   }
 };
 
-Result<ValueDescr> ResolveStrptimeOutput(KernelContext* ctx,
-                                         const std::vector<ValueDescr>&) {
+Result<TypeHolder> ResolveStrptimeOutput(KernelContext* ctx,
+                                         const std::vector<TypeHolder>&) {
   if (!ctx->state()) {
     return Status::Invalid("strptime does not provide default StrptimeOptions");
   }
   const StrptimeOptions& options = StrptimeState::Get(ctx);
-  auto type = timestamp(options.unit, GetZone(options.format));
-  return ValueDescr(std::move(type));
+  return timestamp(options.unit, GetZone(options.format));
 }
 
 // ----------------------------------------------------------------------
 // Convert timestamps from local timestamp without a timezone to timestamps with a
 // timezone, interpreting the local timestamp as being in the specified timezone
 
-Result<ValueDescr> ResolveAssumeTimezoneOutput(KernelContext* ctx,
-                                               const std::vector<ValueDescr>& args) {
-  auto in_type = checked_cast<const TimestampType*>(args[0].type.get());
-  auto type = timestamp(in_type->unit(), AssumeTimezoneState::Get(ctx).timezone);
-  return ValueDescr(std::move(type));
+Result<TypeHolder> ResolveAssumeTimezoneOutput(KernelContext* ctx,
+                                               const std::vector<TypeHolder>& args) {
+  const auto& in_type = checked_cast<const TimestampType&>(*args[0]);
+  return timestamp(in_type.unit(), AssumeTimezoneState::Get(ctx).timezone);
 }
 
 template <typename Duration>
@@ -1528,23 +1472,8 @@ struct ISOCalendarVisitValueFunction<Duration, TimestampType, BuilderType> {
 
 template <typename Duration, typename InType>
 struct ISOCalendar {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-    if (in.is_valid) {
-      ARROW_ASSIGN_OR_RAISE(auto iso_calendar,
-                            (ISOCalendarWrapper<Duration, InType>::Get(in)));
-      ScalarVector values = {std::make_shared<Int64Scalar>(iso_calendar[0]),
-                             std::make_shared<Int64Scalar>(iso_calendar[1]),
-                             std::make_shared<Int64Scalar>(iso_calendar[2])};
-      *checked_cast<StructScalar*>(output) =
-          StructScalar(std::move(values), IsoCalendarType());
-    } else {
-      output->is_valid = false;
-    }
-    return Status::OK();
-  }
-
-  static Status Call(KernelContext* ctx, const ArraySpan& in, ExecResult* out) {
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+    const ArraySpan& in = batch[0].array;
     using BuilderType = typename TypeTraits<Int64Type>::BuilderType;
 
     std::unique_ptr<ArrayBuilder> array_builder;
@@ -1630,8 +1559,7 @@ struct SimpleUnaryTemporalFactory {
 
   template <typename Duration, typename InType>
   void AddKernel(InputType in_type) {
-    auto exec = SimpleUnary<Op<Duration, InType>>;
-    ScalarKernel kernel({std::move(in_type)}, out_type, std::move(exec), init);
+    ScalarKernel kernel({std::move(in_type)}, out_type, Op<Duration, InType>::Exec, init);
     kernel.null_handling = this->null_handling;
     DCHECK_OK(func->AddKernel(kernel));
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 1685718b65e..32f9b4ef9ba 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -32,36 +32,29 @@ namespace compute {
 namespace internal {
 namespace {
 
-struct IsValidOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-    checked_cast<BooleanScalar*>(output)->value = in.is_valid;
+Status IsValidExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_span = out->array_span();
+  if (arr.type->id() == Type::NA) {
+    // Input is all nulls => output is entirely false.
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        false);
     return Status::OK();
   }
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    if (arr.type->id() == Type::NA) {
-      // Input is all nulls => output is entirely false.
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          false);
-      return Status::OK();
-    }
-
-    DCHECK_EQ(out_span->offset, 0);
-    DCHECK_LE(out_span->length, arr.length);
-    if (arr.MayHaveNulls()) {
-      // We could do a zero-copy optimization, but it isn't worth the added complexity
-      ::arrow::internal::CopyBitmap(arr.buffers[0].data, arr.offset, arr.length,
-                                    out_span->buffers[1].data, out_span->offset);
-    } else {
-      // Input has no nulls => output is entirely true.
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          true);
-    }
-    return Status::OK();
+  DCHECK_EQ(out_span->offset, 0);
+  DCHECK_LE(out_span->length, arr.length);
+  if (arr.MayHaveNulls()) {
+    // We could do a zero-copy optimization, but it isn't worth the added complexity
+    ::arrow::internal::CopyBitmap(arr.buffers[0].data, arr.offset, arr.length,
+                                  out_span->buffers[1].data, out_span->offset);
+  } else {
+    // Input has no nulls => output is entirely true.
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
   }
-};
+  return Status::OK();
+}
 
 struct IsFiniteOperator {
   template <typename OutType, typename InType>
@@ -79,102 +72,51 @@ struct IsInfOperator {
 
 using NanOptionsState = OptionsWrapper<NullOptions>;
 
-struct IsNullOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    Scalar* output = out->scalar().get();
-
-    const auto& options = NanOptionsState::Get(ctx);
-    bool* out_value = &checked_cast<BooleanScalar*>(output)->value;
-
-    if (in.is_valid) {
-      if (options.nan_is_null && is_floating(in.type->id())) {
-        switch (in.type->id()) {
-          case Type::FLOAT:
-            *out_value = std::isnan(internal::UnboxScalar<FloatType>::Unbox(in));
-            break;
-          case Type::DOUBLE:
-            *out_value = std::isnan(internal::UnboxScalar<DoubleType>::Unbox(in));
-            break;
-          default:
-            return Status::NotImplemented("NaN detection not implemented for type ",
-                                          in.type->ToString());
-        }
-      } else {
-        *out_value = false;
-      }
-    } else {
-      *out_value = true;
-    }
-
-    return Status::OK();
-  }
-
-  template <typename T>
-  static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_offset) {
-    const T* data = arr.GetValues<T>(1);
-    for (int64_t i = 0; i < arr.length; ++i) {
-      if (std::isnan(data[i])) {
-        bit_util::SetBit(out_bitmap, i + out_offset);
-      }
+template <typename T>
+static void SetNanBits(const ArraySpan& arr, uint8_t* out_bitmap, int64_t out_offset) {
+  const T* data = arr.GetValues<T>(1);
+  for (int64_t i = 0; i < arr.length; ++i) {
+    if (std::isnan(data[i])) {
+      bit_util::SetBit(out_bitmap, i + out_offset);
     }
   }
+}
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-
-    const auto& options = NanOptionsState::Get(ctx);
-    uint8_t* out_bitmap = out_span->buffers[1].data;
-    if (arr.GetNullCount() > 0) {
-      // Input has nulls => output is the inverted null (validity) bitmap.
-      InvertBitmap(arr.buffers[0].data, arr.offset, arr.length, out_bitmap,
-                   out_span->offset);
-    } else {
-      // Input has no nulls => output is entirely false.
-      bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false);
-    }
-
-    if (is_floating(arr.type->id()) && options.nan_is_null) {
-      switch (arr.type->id()) {
-        case Type::FLOAT:
-          SetNanBits<float>(arr, out_bitmap, out_span->offset);
-          break;
-        case Type::DOUBLE:
-          SetNanBits<double>(arr, out_bitmap, out_span->offset);
-          break;
-        default:
-          return Status::NotImplemented("NaN detection not implemented for type ",
-                                        arr.type->ToString());
-      }
-    }
+Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const ArraySpan& arr = batch[0].array;
+  ArraySpan* out_span = out->array_span();
+  if (arr.type->id() == Type::NA) {
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
     return Status::OK();
   }
-};
 
-struct TrueUnlessNullOperator {
-  static Status Call(KernelContext* ctx, const Scalar& in, ExecResult* out) {
-    BooleanScalar* output = checked_cast<BooleanScalar*>(out->scalar().get());
-    output->is_valid = in.is_valid;
-    output->value = true;
-    return Status::OK();
+  const auto& options = NanOptionsState::Get(ctx);
+  uint8_t* out_bitmap = out_span->buffers[1].data;
+  if (arr.GetNullCount() > 0) {
+    // Input has nulls => output is the inverted null (validity) bitmap.
+    InvertBitmap(arr.buffers[0].data, arr.offset, arr.length, out_bitmap,
+                 out_span->offset);
+  } else {
+    // Input has no nulls => output is entirely false.
+    bit_util::SetBitsTo(out_bitmap, out_span->offset, out_span->length, false);
   }
 
-  static Status Call(KernelContext* ctx, const ArraySpan& arr, ExecResult* out) {
-    ArraySpan* out_span = out->array_span();
-    if (out_span->buffers[0].data) {
-      // If there is a validity bitmap computed above the kernel
-      // invocation, we copy it to the output buffers
-      ::arrow::internal::CopyBitmap(out_span->buffers[0].data, out_span->offset,
-                                    out_span->length, out_span->buffers[1].data,
-                                    out_span->offset);
-    } else {
-      // But for all-valid inputs, the engine will skip allocating a
-      // validity bitmap, so we set everything to true
-      bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
-                          true);
+  if (is_floating(arr.type->id()) && options.nan_is_null) {
+    switch (arr.type->id()) {
+      case Type::FLOAT:
+        SetNanBits<float>(arr, out_bitmap, out_span->offset);
+        break;
+      case Type::DOUBLE:
+        SetNanBits<double>(arr, out_bitmap, out_span->offset);
+        break;
+      default:
+        return Status::NotImplemented("NaN detection not implemented for type ",
+                                      arr.type->ToString());
     }
-    return Status::OK();
   }
-};
+  return Status::OK();
+}
 
 struct IsNanOperator {
   template <typename OutType, typename InType>
@@ -208,10 +150,6 @@ void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction*
 
 template <bool kConstant>
 Status ConstBoolExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  if (batch[0].is_scalar()) {
-    checked_cast<BooleanScalar*>(out->scalar().get())->value = kConstant;
-    return Status::OK();
-  }
   ArraySpan* array = out->array_span();
   bit_util::SetBitsTo(array->buffers[1].data, array->offset, array->length, kConstant);
   return Status::OK();
@@ -271,29 +209,21 @@ std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name, FunctionDoc
   return func;
 }
 
-Status IsValidExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
-}
-
-Status IsNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  const ExecValue& arg0 = batch[0];
-  if (arg0.type()->id() == Type::NA) {
-    if (arg0.is_scalar()) {
-      out->value = std::make_shared<BooleanScalar>(true);
-    } else {
-      // Data is preallocated
-      ArraySpan* out_arr = out->array_span();
-      bit_util::SetBitsTo(out_arr->buffers[1].data, out_arr->offset, out_arr->length,
-                          true);
-    }
-    return Status::OK();
+Status TrueUnlessNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  ArraySpan* out_span = out->array_span();
+  if (out_span->buffers[0].data) {
+    // If there is a validity bitmap computed above the kernel
+    // invocation, we copy it to the output buffers
+    ::arrow::internal::CopyBitmap(out_span->buffers[0].data, out_span->offset,
+                                  out_span->length, out_span->buffers[1].data,
+                                  out_span->offset);
   } else {
-    return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
+    // But for all-valid inputs, the engine will skip allocating a
+    // validity bitmap, so we set everything to true
+    bit_util::SetBitsTo(out_span->buffers[1].data, out_span->offset, out_span->length,
+                        true);
   }
-}
-
-Status TrueUnlessNullExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return applicator::SimpleUnary<TrueUnlessNullOperator>(ctx, batch, out);
+  return Status::OK();
 }
 
 const FunctionDoc is_valid_doc(
@@ -331,15 +261,15 @@ const FunctionDoc is_nan_doc("Return true if NaN",
 
 void RegisterScalarValidity(FunctionRegistry* registry) {
   static auto kNullOptions = NullOptions::Defaults();
-  MakeFunction("is_valid", is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec,
+  MakeFunction("is_valid", is_valid_doc, {InputType::Any()}, boolean(), IsValidExec,
                registry, NullHandling::OUTPUT_NOT_NULL,
                /*can_write_into_slices=*/false);
 
-  MakeFunction("is_null", is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec, registry,
-               NullHandling::OUTPUT_NOT_NULL,
+  MakeFunction("is_null", is_null_doc, {InputType::Any()}, boolean(), IsNullExec,
+               registry, NullHandling::OUTPUT_NOT_NULL,
                /*can_write_into_slices=*/true, &kNullOptions, NanOptionsState::Init);
 
-  MakeFunction("true_unless_null", true_unless_null_doc, {ValueDescr::ANY}, boolean(),
+  MakeFunction("true_unless_null", true_unless_null_doc, {InputType::Any()}, boolean(),
                TrueUnlessNullExec, registry, NullHandling::INTERSECTION,
                /*can_write_into_slices=*/false);
 
diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc
index 169d8d6935e..177489fa635 100644
--- a/cpp/src/arrow/compute/kernels/test_util.cc
+++ b/cpp/src/arrow/compute/kernels/test_util.cc
@@ -336,8 +336,8 @@ void ValidateOutput(const Datum& output) {
   }
 }
 
-void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_values,
-                       std::vector<ValueDescr> expected_equivalent_values) {
+void CheckDispatchBest(std::string func_name, std::vector<TypeHolder> original_values,
+                       std::vector<TypeHolder> expected_equivalent_values) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
 
   auto values = original_values;
@@ -347,22 +347,20 @@ void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> original_v
                        function->DispatchExact(expected_equivalent_values));
 
   EXPECT_EQ(actual_kernel, expected_kernel)
-      << "  DispatchBest" << ValueDescr::ToString(original_values) << " => "
+      << "  DispatchBest" << TypeHolder::ToString(original_values) << " => "
       << actual_kernel->signature->ToString() << "\n"
-      << "  DispatchExact" << ValueDescr::ToString(expected_equivalent_values) << " => "
+      << "  DispatchExact" << TypeHolder::ToString(expected_equivalent_values) << " => "
       << expected_kernel->signature->ToString();
   EXPECT_EQ(values.size(), expected_equivalent_values.size());
   for (size_t i = 0; i < values.size(); i++) {
-    EXPECT_EQ(values[i].shape, expected_equivalent_values[i].shape)
-        << "Argument " << i << " should have the same shape";
-    AssertTypeEqual(values[i].type, expected_equivalent_values[i].type);
+    AssertTypeEqual(*values[i], *expected_equivalent_values[i]);
   }
 }
 
-void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> values) {
+void CheckDispatchFails(std::string func_name, std::vector<TypeHolder> types) {
   ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name));
-  ASSERT_NOT_OK(function->DispatchBest(&values));
-  ASSERT_NOT_OK(function->DispatchExact(values));
+  ASSERT_NOT_OK(function->DispatchBest(&types));
+  ASSERT_NOT_OK(function->DispatchExact(types));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util.h
index 81c468958c0..73762a1ac67 100644
--- a/cpp/src/arrow/compute/kernels/test_util.h
+++ b/cpp/src/arrow/compute/kernels/test_util.h
@@ -157,12 +157,12 @@ void TestRandomPrimitiveCTypes() {
 }
 
 // Check that DispatchBest on a given function yields the same Kernel as
-// produced by DispatchExact on another set of ValueDescrs.
-void CheckDispatchBest(std::string func_name, std::vector<ValueDescr> descrs,
-                       std::vector<ValueDescr> exact_descrs);
+// produced by DispatchExact on another set of types
+void CheckDispatchBest(std::string func_name, std::vector<TypeHolder> types,
+                       std::vector<TypeHolder> exact_types);
 
-// Check that function fails to produce a Kernel for the set of ValueDescrs.
-void CheckDispatchFails(std::string func_name, std::vector<ValueDescr> descrs);
+// Check that function fails to produce a Kernel for the set of types
+void CheckDispatchFails(std::string func_name, std::vector<TypeHolder> types);
 
 // Helper to get a default instance of a type, including parameterized types
 template <typename T>
diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc b/cpp/src/arrow/compute/kernels/util_internal.cc
index 25d46d821b4..4293597129b 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.cc
+++ b/cpp/src/arrow/compute/kernels/util_internal.cc
@@ -30,70 +30,6 @@ using internal::checked_cast;
 namespace compute {
 namespace internal {
 
-// TODO(wesm): ARROW-16577: this will be unneeded later
-ArrayKernelExec TrivialScalarUnaryAsArraysExec(ArrayKernelExec exec, bool use_array_span,
-                                               NullHandling::type null_handling) {
-  return [=](KernelContext* ctx, const ExecSpan& span, ExecResult* out) -> Status {
-    if (!out->is_scalar()) {
-      return exec(ctx, span, out);
-    }
-
-    if (null_handling == NullHandling::INTERSECTION && !span[0].scalar->is_valid) {
-      out->scalar()->is_valid = false;
-      return Status::OK();
-    }
-
-    ExecSpan span_with_arrays;
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array_in,
-                          MakeArrayFromScalar(*span[0].scalar, 1));
-    span_with_arrays.length = 1;
-    span_with_arrays.values = {ExecValue(*array_in->data())};
-
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> array_out,
-                          MakeArrayFromScalar(*out->scalar(), 1));
-
-    ExecResult array_result;
-
-    // Send either ArraySpan or ArrayData depending on what modality the kernel
-    // is expecting, which we have to specify manually for now
-    if (!use_array_span) {
-      array_result.value = array_out->data();
-      RETURN_NOT_OK(exec(ctx, span_with_arrays, &array_result));
-      ARROW_ASSIGN_OR_RAISE(out->value,
-                            MakeArray(array_result.array_data())->GetScalar(0));
-    } else {
-      DCHECK(is_fixed_width(out->type()->id()));
-      ArrayData* out_data = array_out->data().get();
-
-      // the null count will be unknown after the kernel executes
-      out_data->null_count = kUnknownNullCount;
-
-      ArraySpan* span = array_result.array_span();
-
-      // TODO(wesm): It isn't safe to write into the memory allocated by
-      // MakeArrayFromScalar because MakeArrayOfNull reuses memory across
-      // buffers. So to be able to write into an ArraySpan we need to allocate
-      // some memory with the same structure as array_out
-      //
-      // Should probably implement a "make empty" array whose buffers are all
-      // safe to modify
-      if (out_data->buffers[0]) {
-        ARROW_ASSIGN_OR_RAISE(out_data->buffers[0],
-                              out_data->buffers[0]->CopySlice(0, 1));
-      }
-      ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], out_data->buffers[1]->CopySlice(
-                                                      0, out_data->buffers[1]->size()));
-      span->SetMembers(*out_data);
-      RETURN_NOT_OK(exec(ctx, span_with_arrays, &array_result));
-
-      // XXX(wesm): have to rebox the array after mutating the buffers because
-      // of the cached validity bitmap buffer
-      ARROW_ASSIGN_OR_RAISE(out->value, MakeArray(array_out->data())->GetScalar(0));
-    }
-    return Status::OK();
-  };
-}
-
 ExecValue GetExecValue(const Datum& value) {
   ExecValue result;
   if (value.is_array()) {
diff --git a/cpp/src/arrow/compute/kernels/util_internal.h b/cpp/src/arrow/compute/kernels/util_internal.h
index dba99759eaf..5e283b56180 100644
--- a/cpp/src/arrow/compute/kernels/util_internal.h
+++ b/cpp/src/arrow/compute/kernels/util_internal.h
@@ -47,17 +47,6 @@ constexpr Unsigned to_unsigned(T signed_) {
   return static_cast<Unsigned>(signed_);
 }
 
-// Augment a unary ArrayKernelExec which supports only array-like inputs
-// with support for scalar inputs. Scalars will be transformed to 1-long arrays
-// with the scalar's value (or null if the scalar is null) as its only
-// element. This 1-long array will be passed to the original exec, then the
-// only element of the resulting array will be extracted as the output
-// scalar. This could be far more efficient, but instead of optimizing this
-// it'd be better to support scalar inputs "upstream" in original exec.
-ArrayKernelExec TrivialScalarUnaryAsArraysExec(
-    ArrayKernelExec exec, bool use_array_span = true,
-    NullHandling::type null_handling = NullHandling::INTERSECTION);
-
 // Return (min, max) of a numerical array, ignore nulls.
 // For empty array, return the maximal number limit as 'min', and minimal limit as 'max'.
 template <typename T>
diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
index 2eadbe01c4e..324a435441f 100644
--- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
@@ -465,45 +465,44 @@ Status ArraySortIndicesChunked(KernelContext* ctx, const ExecBatch& batch, Datum
 template <template <typename...> class ExecTemplate>
 void AddArraySortingKernels(VectorKernel base, VectorFunction* func) {
   // null type
-  base.signature = KernelSignature::Make({InputType::Array(null())}, uint64());
+  base.signature = KernelSignature::Make({null()}, uint64());
   base.exec = ExecTemplate<UInt64Type, NullType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 
   // bool type
-  base.signature = KernelSignature::Make({InputType::Array(boolean())}, uint64());
+  base.signature = KernelSignature::Make({boolean()}, uint64());
   base.exec = ExecTemplate<UInt64Type, BooleanType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 
   // duration type
-  base.signature = KernelSignature::Make({InputType::Array(Type::DURATION)}, uint64());
+  base.signature = KernelSignature::Make({Type::DURATION}, uint64());
   base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*int64());
   DCHECK_OK(func->AddKernel(base));
 
   for (const auto& ty : NumericTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    base.signature = KernelSignature::Make({ty}, uint64());
     base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto& ty : TemporalTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty->id())}, uint64());
+    base.signature = KernelSignature::Make({ty->id()}, uint64());
     base.exec = GenerateNumeric<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto id : {Type::DECIMAL128, Type::DECIMAL256}) {
-    base.signature = KernelSignature::Make({InputType::Array(id)}, uint64());
+    base.signature = KernelSignature::Make({id}, uint64());
     base.exec = GenerateDecimal<ExecTemplate, UInt64Type>(id);
     DCHECK_OK(func->AddKernel(base));
   }
   for (const auto& ty : BaseBinaryTypes()) {
     auto physical_type = GetPhysicalType(ty);
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    base.signature = KernelSignature::Make({ty}, uint64());
     base.exec = GenerateVarBinaryBase<ExecTemplate, UInt64Type>(*physical_type);
     DCHECK_OK(func->AddKernel(base));
   }
-  base.signature =
-      KernelSignature::Make({InputType::Array(Type::FIXED_SIZE_BINARY)}, uint64());
+  base.signature = KernelSignature::Make({Type::FIXED_SIZE_BINARY}, uint64());
   base.exec = ExecTemplate<UInt64Type, FixedSizeBinaryType>::Exec;
   DCHECK_OK(func->AddKernel(base));
 }
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
index 241438c529e..fb221aa9fe6 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc
@@ -54,10 +54,10 @@ struct CumulativeOptionsWrapper : public OptionsWrapper<OptionsType> {
     }
 
     // Ensure `start` option matches input type
-    if (!start->type->Equals(args.inputs[0].type)) {
-      ARROW_ASSIGN_OR_RAISE(auto casted_start,
-                            Cast(Datum(start), args.inputs[0].type, CastOptions::Safe(),
-                                 ctx->exec_context()));
+    if (!start->type->Equals(*args.inputs[0])) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto casted_start,
+          Cast(Datum(start), args.inputs[0], CastOptions::Safe(), ctx->exec_context()));
       auto new_options = OptionsType(casted_start.scalar(), options->skip_nulls);
       return ::arrow::internal::make_unique<State>(new_options);
     }
@@ -125,14 +125,7 @@ struct CumulativeKernel {
     accumulator.skip_nulls = options.skip_nulls;
 
     RETURN_NOT_OK(accumulator.builder.Reserve(batch.length));
-
-    if (batch[0].is_array()) {
-      RETURN_NOT_OK(accumulator.Accumulate(batch[0].array));
-    } else {
-      // TODO(wesm): address up-promotion at a higher level per ARROW-16756
-      ArraySpan span(*batch[0].scalar);
-      RETURN_NOT_OK(accumulator.Accumulate(span));
-    }
+    RETURN_NOT_OK(accumulator.Accumulate(batch[0].array));
 
     std::shared_ptr<ArrayData> result;
     RETURN_NOT_OK(accumulator.builder.FinishInternal(&result));
@@ -196,8 +189,7 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string
     kernel.can_execute_chunkwise = false;
     kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
     kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
-    kernel.signature =
-        KernelSignature::Make({InputType::Array(ty)}, OutputType(ValueDescr(ty)));
+    kernel.signature = KernelSignature::Make({ty}, OutputType(ty));
     kernel.exec =
         ArithmeticExecFromOp<CumulativeKernel, Op, ArrayKernelExec, OptionsType>(ty);
     kernel.exec_chunked =
diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
index f3fec8870fd..9ec287b537d 100644
--- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc
@@ -66,21 +66,45 @@ TEST(TestCumulativeSum, AllNulls) {
   }
 }
 
-using testing::HasSubstr;
-
-TEST(TestCumulativeSum, ScalarNotSupported) {
-  CumulativeSumOptions options;
-
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("no kernel"),
-      CallFunction("cumulative_sum", {std::make_shared<Int64Scalar>(5)}, &options));
+TEST(TestCumulativeSum, ScalarInput) {
+  CumulativeSumOptions no_start_no_skip;
+  CumulativeSumOptions no_start_do_skip(0, true);
+  CumulativeSumOptions has_start_no_skip(10);
+  CumulativeSumOptions has_start_do_skip(10, true);
 
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      NotImplemented, HasSubstr("no kernel"),
-      CallFunction("cumulative_sum_checked", {std::make_shared<Int64Scalar>(5)},
-                   &options));
+  for (auto ty : NumericTypes()) {
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[10]"), &no_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[20]"), &has_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"),
+                     ArrayFromJSON(ty, "[20]"), &has_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_no_skip);
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_no_skip);
+
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &no_start_do_skip);
+    CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+    CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"),
+                     ArrayFromJSON(ty, "[null]"), &has_start_do_skip);
+  }
 }
 
+using testing::HasSubstr;
+
 template <typename ArrowType>
 void CheckCumulativeSumUnsignedOverflow() {
   using CType = typename TypeTraits<ArrowType>::CType;
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index f4b846f1c9e..c8b5173b8d9 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -469,7 +469,7 @@ class DictionaryHashKernel : public HashKernel {
       auto in_dict_array = arr.ToArray();
       ARROW_ASSIGN_OR_RAISE(
           auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array)
-                        .Transpose(arr.type->Copy(), out_dict, transpose));
+                        .Transpose(arr.type->GetSharedPtr(), out_dict, transpose));
       return indices_kernel_->Append(*tmp->data());
     }
 
@@ -525,7 +525,7 @@ Result<std::unique_ptr<HashKernel>> HashInitImpl(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   using HashKernelType = typename HashKernelTraits<Type, Action>::HashKernel;
   auto result = ::arrow::internal::make_unique<HashKernelType>(
-      args.inputs[0].type, args.options, ctx->memory_pool());
+      args.inputs[0].GetSharedPtr(), args.options, ctx->memory_pool());
   RETURN_NOT_OK(result->Reset());
   return std::move(result);
 }
@@ -698,20 +698,22 @@ Status ValueCountsFinalizeDictionary(KernelContext* ctx, std::vector<Datum>* out
   return Status::OK();
 }
 
-ValueDescr DictEncodeOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(dictionary(int32(), descrs[0].type));
+Result<TypeHolder> DictEncodeOutput(KernelContext*,
+                                    const std::vector<TypeHolder>& types) {
+  return dictionary(int32(), types[0].GetSharedPtr());
 }
 
-ValueDescr ValueCountsOutput(KernelContext*, const std::vector<ValueDescr>& descrs) {
-  return ValueDescr::Array(struct_(
-      {field(kValuesFieldName, descrs[0].type), field(kCountsFieldName, int64())}));
+Result<TypeHolder> ValueCountsOutput(KernelContext*,
+                                     const std::vector<TypeHolder>& types) {
+  return struct_({field(kValuesFieldName, types[0].GetSharedPtr()),
+                  field(kCountsFieldName, int64())});
 }
 
 template <typename Action>
 void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty) {
   for (const auto& ty : PrimitiveTypes()) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    base.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
@@ -720,19 +722,19 @@ void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty)
                            timestamp(TimeUnit::SECOND), fixed_size_binary(0)};
   for (const auto& ty : parametric_types) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty->id())}, out_ty);
+    base.signature = KernelSignature::Make({ty->id()}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
   for (auto t : {Type::DECIMAL128, Type::DECIMAL256}) {
     base.init = GetHashInit<Action>(t);
-    base.signature = KernelSignature::Make({InputType::Array(t)}, out_ty);
+    base.signature = KernelSignature::Make({t}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 
   for (const auto& ty : IntervalTypes()) {
     base.init = GetHashInit<Action>(ty->id());
-    base.signature = KernelSignature::Make({InputType::Array(ty)}, out_ty);
+    base.signature = KernelSignature::Make({ty}, out_ty);
     DCHECK_OK(func->AddKernel(base));
   }
 }
@@ -771,13 +773,12 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   base.finalize = UniqueFinalize;
   base.output_chunked = false;
   auto unique = std::make_shared<VectorFunction>("unique", Arity::Unary(), unique_doc);
-  AddHashKernels<UniqueAction>(unique.get(), base, OutputType(FirstType));
+  AddHashKernels<UniqueAction>(unique.get(), base, FirstType);
 
   // Dictionary unique
   base.init = DictionaryHashInit<UniqueAction>;
   base.finalize = UniqueFinalizeDictionary;
-  base.signature =
-      KernelSignature::Make({InputType::Array(Type::DICTIONARY)}, OutputType(FirstType));
+  base.signature = KernelSignature::Make({Type::DICTIONARY}, FirstType);
   DCHECK_OK(unique->AddKernel(base));
 
   DCHECK_OK(registry->AddFunction(std::move(unique)));
@@ -788,14 +789,12 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   base.finalize = ValueCountsFinalize;
   auto value_counts =
       std::make_shared<VectorFunction>("value_counts", Arity::Unary(), value_counts_doc);
-  AddHashKernels<ValueCountsAction>(value_counts.get(), base,
-                                    OutputType(ValueCountsOutput));
+  AddHashKernels<ValueCountsAction>(value_counts.get(), base, ValueCountsOutput);
 
   // Dictionary value counts
   base.init = DictionaryHashInit<ValueCountsAction>;
   base.finalize = ValueCountsFinalizeDictionary;
-  base.signature = KernelSignature::Make({InputType::Array(Type::DICTIONARY)},
-                                         OutputType(ValueCountsOutput));
+  base.signature = KernelSignature::Make({Type::DICTIONARY}, ValueCountsOutput);
   DCHECK_OK(value_counts->AddKernel(base));
 
   DCHECK_OK(registry->AddFunction(std::move(value_counts)));
@@ -810,7 +809,7 @@ void RegisterVectorHash(FunctionRegistry* registry) {
   auto dict_encode = std::make_shared<VectorFunction>(
       "dictionary_encode", Arity::Unary(), dictionary_encode_doc,
       GetDefaultDictionaryEncodeOptions());
-  AddHashKernels<DictEncodeAction>(dict_encode.get(), base, OutputType(DictEncodeOutput));
+  AddHashKernels<DictEncodeAction>(dict_encode.get(), base, DictEncodeOutput);
 
   // Calling dictionary_encode on dictionary input not supported, but if it
   // ends up being needed (or convenience), a kernel could be added to make it
diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc
index 1ca96e9f83a..1a17d551ec1 100644
--- a/cpp/src/arrow/compute/kernels/vector_nested.cc
+++ b/cpp/src/arrow/compute/kernels/vector_nested.cc
@@ -158,13 +158,12 @@ class ListParentIndicesFunction : public MetaFunction {
 void RegisterVectorNested(FunctionRegistry* registry) {
   auto flatten =
       std::make_shared<VectorFunction>("list_flatten", Arity::Unary(), list_flatten_doc);
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LIST)}, OutputType(ListValuesType),
+  DCHECK_OK(flatten->AddKernel({Type::LIST}, OutputType(ListValuesType),
                                ListFlatten<ListType>));
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::FIXED_SIZE_LIST)},
-                               OutputType(ListValuesType),
+  DCHECK_OK(flatten->AddKernel({Type::FIXED_SIZE_LIST}, OutputType(ListValuesType),
                                ListFlatten<FixedSizeListType>));
-  DCHECK_OK(flatten->AddKernel({InputType::Array(Type::LARGE_LIST)},
-                               OutputType(ListValuesType), ListFlatten<LargeListType>));
+  DCHECK_OK(flatten->AddKernel({Type::LARGE_LIST}, OutputType(ListValuesType),
+                               ListFlatten<LargeListType>));
   DCHECK_OK(registry->AddFunction(std::move(flatten)));
 
   DCHECK_OK(registry->AddFunction(std::make_shared<ListParentIndicesFunction>()));
diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc
index 151757884ba..25c6e5947d9 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace.cc
@@ -126,7 +126,7 @@ struct ReplaceMaskImpl<
     std::shared_ptr<Scalar> null_scalar;
     if (!mask.is_valid) {
       // Output = null
-      null_scalar = MakeNullScalar(out->type()->Copy());
+      null_scalar = MakeNullScalar(out->type()->GetSharedPtr());
       source.SetScalar(null_scalar.get());
     } else if (mask.value) {
       // Output = replacement
@@ -238,7 +238,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
       // Output = null
       ARROW_ASSIGN_OR_RAISE(
           auto replacement_array,
-          MakeArrayOfNull(array.type->Copy(), array.length, ctx->memory_pool()));
+          MakeArrayOfNull(array.type->GetSharedPtr(), array.length, ctx->memory_pool()));
       out->value = std::move(replacement_array->data());
       return replacements_offset;
     } else if (mask.value) {
@@ -269,7 +269,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
                                        const ArraySpan& mask, int64_t mask_offset,
                                        ExecValue replacements,
                                        int64_t replacements_offset, ExecResult* out) {
-    BuilderType builder(array.type->Copy(), ctx->memory_pool());
+    BuilderType builder(array.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(array.length));
     RETURN_NOT_OK(builder.ReserveData(array.buffers[2].size));
     int64_t source_offset = 0;
@@ -312,7 +312,7 @@ struct ReplaceMaskImpl<Type, enable_if_base_binary<Type>> {
     std::shared_ptr<ArrayData> temp_output;
     RETURN_NOT_OK(builder.FinishInternal(&temp_output));
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
-    temp_output->type = array.type->Copy();
+    temp_output->type = array.type->GetSharedPtr();
     out->value = std::move(temp_output);
     return replacements_offset;
   }
@@ -376,9 +376,8 @@ struct ReplaceMask {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make(
-        {InputType::Array(get_id.id), InputType(boolean()), InputType(get_id.id)},
-        OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id), boolean(), InputType(get_id.id)},
+                                 FirstType);
   }
 };
 
@@ -545,7 +544,7 @@ struct FillNullImpl<Type, enable_if_base_binary<Type>> {
                      int64_t* last_valid_value_offset) {
     ArrayData* out_arr = out->array_data().get();
 
-    BuilderType builder(current_chunk.type->Copy(), ctx->memory_pool());
+    BuilderType builder(current_chunk.type->GetSharedPtr(), ctx->memory_pool());
     RETURN_NOT_OK(builder.Reserve(current_chunk.length));
     RETURN_NOT_OK(builder.ReserveData(current_chunk.buffers[2].size));
     int64_t array_value_index = direction == 1 ? 0 : current_chunk.length - 1;
@@ -620,7 +619,7 @@ struct FillNullImpl<Type, enable_if_base_binary<Type>> {
     RETURN_NOT_OK(builder.Finish(&temp_output));
     out->value = std::move(temp_output->data());
     // Builder type != logical type due to GenerateTypeAgnosticVarBinaryBase
-    out->array_data()->type = current_chunk.type->Copy();
+    out->array_data()->type = current_chunk.type->GetSharedPtr();
     return Status::OK();
   }
 };
@@ -668,7 +667,7 @@ struct FillNullForward {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make({InputType::Array(get_id.id)}, OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id)}, FirstType);
   }
 };
 
@@ -748,7 +747,7 @@ struct FillNullBackward {
   }
 
   static std::shared_ptr<KernelSignature> GetSignature(detail::GetTypeId get_id) {
-    return KernelSignature::Make({InputType::Array(get_id.id)}, OutputType(FirstType));
+    return KernelSignature::Make({InputType(get_id.id)}, FirstType);
   }
 };
 
diff --git a/cpp/src/arrow/compute/kernels/vector_selection.cc b/cpp/src/arrow/compute/kernels/vector_selection.cc
index 3a4f957ae5b..5060b06465b 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection.cc
@@ -1,4 +1,3 @@
-
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
@@ -1998,11 +1997,12 @@ class FilterMetaFunction : public MetaFunction {
 // R -> RecordBatch
 // T -> Table
 
-Result<std::shared_ptr<Array>> TakeAA(const Array& values, const Array& indices,
-                                      const TakeOptions& options, ExecContext* ctx) {
+Result<std::shared_ptr<ArrayData>> TakeAA(const std::shared_ptr<ArrayData>& values,
+                                          const std::shared_ptr<ArrayData>& indices,
+                                          const TakeOptions& options, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result,
                         CallFunction("array_take", {values, indices}, &options, ctx));
-  return result.make_array();
+  return result.array();
 }
 
 Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
@@ -2010,7 +2010,6 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
                                              const TakeOptions& options,
                                              ExecContext* ctx) {
   auto num_chunks = values.num_chunks();
-  std::vector<std::shared_ptr<Array>> new_chunks(1);  // Hard-coded 1 for now
   std::shared_ptr<Array> current_chunk;
 
   // Case 1: `values` has a single chunk, so just use it
@@ -2032,8 +2031,10 @@ Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
     }
   }
   // Call Array Take on our single chunk
-  ARROW_ASSIGN_OR_RAISE(new_chunks[0], TakeAA(*current_chunk, indices, options, ctx));
-  return std::make_shared<ChunkedArray>(std::move(new_chunks));
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> new_chunk,
+                        TakeAA(current_chunk->data(), indices.data(), options, ctx));
+  std::vector<std::shared_ptr<Array>> chunks = {MakeArray(new_chunk)};
+  return std::make_shared<ChunkedArray>(std::move(chunks));
 }
 
 Result<std::shared_ptr<ChunkedArray>> TakeCC(const ChunkedArray& values,
@@ -2063,7 +2064,9 @@ Result<std::shared_ptr<ChunkedArray>> TakeAC(const Array& values,
   std::vector<std::shared_ptr<Array>> new_chunks(num_chunks);
   for (int i = 0; i < num_chunks; i++) {
     // Take with that indices chunk
-    ARROW_ASSIGN_OR_RAISE(new_chunks[i], TakeAA(values, *indices.chunk(i), options, ctx));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> chunk,
+                          TakeAA(values.data(), indices.chunk(i)->data(), options, ctx));
+    new_chunks[i] = MakeArray(chunk);
   }
   return std::make_shared<ChunkedArray>(std::move(new_chunks), values.type());
 }
@@ -2076,7 +2079,9 @@ Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
   auto nrows = indices.length();
   std::vector<std::shared_ptr<Array>> columns(ncols);
   for (int j = 0; j < ncols; j++) {
-    ARROW_ASSIGN_OR_RAISE(columns[j], TakeAA(*batch.column(j), indices, options, ctx));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> col_data,
+                          TakeAA(batch.column(j)->data(), indices.data(), options, ctx));
+    columns[j] = MakeArray(col_data);
   }
   return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
@@ -2131,7 +2136,7 @@ class TakeMetaFunction : public MetaFunction {
     switch (args[0].kind()) {
       case Datum::ARRAY:
         if (index_kind == Datum::ARRAY) {
-          return TakeAA(*args[0].make_array(), *args[1].make_array(), take_opts, ctx);
+          return TakeAA(args[0].array(), args[1].array(), take_opts, ctx);
         } else if (index_kind == Datum::CHUNKED_ARRAY) {
           return TakeAC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx);
         }
@@ -2330,22 +2335,22 @@ Status TakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   return kernel.ExecTake();
 }
 
-struct SelectionKernelDescr {
+struct SelectionKernelData {
   InputType input;
   ArrayKernelExec exec;
 };
 
 void RegisterSelectionFunction(const std::string& name, FunctionDoc doc,
                                VectorKernel base_kernel, InputType selection_type,
-                               const std::vector<SelectionKernelDescr>& descrs,
+                               const std::vector<SelectionKernelData>& kernels,
                                const FunctionOptions* default_options,
                                FunctionRegistry* registry) {
   auto func = std::make_shared<VectorFunction>(name, Arity::Binary(), std::move(doc),
                                                default_options);
-  for (auto& descr : descrs) {
-    base_kernel.signature = KernelSignature::Make(
-        {std::move(descr.input), selection_type}, OutputType(FirstType));
-    base_kernel.exec = descr.exec;
+  for (auto& kernel_data : kernels) {
+    base_kernel.signature =
+        KernelSignature::Make({std::move(kernel_data.input), selection_type}, FirstType);
+    base_kernel.exec = kernel_data.exec;
     DCHECK_OK(func->AddKernel(base_kernel));
   }
   DCHECK_OK(registry->AddFunction(std::move(func)));
@@ -2447,7 +2452,7 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
 
   auto AddKernels = [&](const std::vector<std::shared_ptr<DataType>>& types) {
     for (const std::shared_ptr<DataType>& ty : types) {
-      kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+      kernel.signature = KernelSignature::Make({ty}, uint64());
       DCHECK_OK(func->AddKernel(kernel));
     }
   };
@@ -2456,7 +2461,7 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
   AddKernels({boolean()});
 
   for (const auto& ty : {Type::DECIMAL128, Type::DECIMAL256}) {
-    kernel.signature = KernelSignature::Make({InputType::Array(ty)}, uint64());
+    kernel.signature = KernelSignature::Make({ty}, uint64());
     DCHECK_OK(func->AddKernel(kernel));
   }
 
@@ -2467,62 +2472,59 @@ std::shared_ptr<VectorFunction> MakeIndicesNonZeroFunction(std::string name,
 
 void RegisterVectorSelection(FunctionRegistry* registry) {
   // Filter kernels
-  std::vector<SelectionKernelDescr> filter_kernel_descrs = {
-      {InputType(match::Primitive(), ValueDescr::ARRAY), PrimitiveFilter},
-      {InputType(match::BinaryLike(), ValueDescr::ARRAY), BinaryFilter},
-      {InputType(match::LargeBinaryLike(), ValueDescr::ARRAY), BinaryFilter},
-      {InputType::Array(Type::FIXED_SIZE_BINARY), FilterExec<FSBImpl>},
-      {InputType::Array(null()), NullFilter},
-      {InputType::Array(Type::DECIMAL128), FilterExec<FSBImpl>},
-      {InputType::Array(Type::DECIMAL256), FilterExec<FSBImpl>},
-      {InputType::Array(Type::DICTIONARY), DictionaryFilter},
-      {InputType::Array(Type::EXTENSION), ExtensionFilter},
-      {InputType::Array(Type::LIST), FilterExec<ListImpl<ListType>>},
-      {InputType::Array(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
-      {InputType::Array(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
-      {InputType::Array(Type::STRUCT), StructFilter},
+  std::vector<SelectionKernelData> filter_kernels = {
+      {InputType(match::Primitive()), PrimitiveFilter},
+      {InputType(match::BinaryLike()), BinaryFilter},
+      {InputType(match::LargeBinaryLike()), BinaryFilter},
+      {InputType(Type::FIXED_SIZE_BINARY), FilterExec<FSBImpl>},
+      {InputType(null()), NullFilter},
+      {InputType(Type::DECIMAL128), FilterExec<FSBImpl>},
+      {InputType(Type::DECIMAL256), FilterExec<FSBImpl>},
+      {InputType(Type::DICTIONARY), DictionaryFilter},
+      {InputType(Type::EXTENSION), ExtensionFilter},
+      {InputType(Type::LIST), FilterExec<ListImpl<ListType>>},
+      {InputType(Type::LARGE_LIST), FilterExec<ListImpl<LargeListType>>},
+      {InputType(Type::FIXED_SIZE_LIST), FilterExec<FSLImpl>},
+      {InputType(Type::DENSE_UNION), FilterExec<DenseUnionImpl>},
+      {InputType(Type::STRUCT), StructFilter},
       // TODO: Reuse ListType kernel for MAP
-      {InputType::Array(Type::MAP), FilterExec<ListImpl<MapType>>},
+      {InputType(Type::MAP), FilterExec<ListImpl<MapType>>},
   };
 
   VectorKernel filter_base;
   filter_base.init = FilterState::Init;
   RegisterSelectionFunction("array_filter", array_filter_doc, filter_base,
-                            /*selection_type=*/InputType::Array(boolean()),
-                            filter_kernel_descrs, GetDefaultFilterOptions(), registry);
+                            /*selection_type=*/boolean(), filter_kernels,
+                            GetDefaultFilterOptions(), registry);
 
   DCHECK_OK(registry->AddFunction(std::make_shared<FilterMetaFunction>()));
 
   // Take kernels
-  std::vector<SelectionKernelDescr> take_kernel_descrs = {
-      {InputType(match::Primitive(), ValueDescr::ARRAY), PrimitiveTake},
-      {InputType(match::BinaryLike(), ValueDescr::ARRAY),
-       TakeExec<VarBinaryImpl<BinaryType>>},
-      {InputType(match::LargeBinaryLike(), ValueDescr::ARRAY),
-       TakeExec<VarBinaryImpl<LargeBinaryType>>},
-      {InputType::Array(Type::FIXED_SIZE_BINARY), TakeExec<FSBImpl>},
-      {InputType::Array(null()), NullTake},
-      {InputType::Array(Type::DECIMAL128), TakeExec<FSBImpl>},
-      {InputType::Array(Type::DECIMAL256), TakeExec<FSBImpl>},
-      {InputType::Array(Type::DICTIONARY), DictionaryTake},
-      {InputType::Array(Type::EXTENSION), ExtensionTake},
-      {InputType::Array(Type::LIST), TakeExec<ListImpl<ListType>>},
-      {InputType::Array(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
-      {InputType::Array(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
-      {InputType::Array(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
-      {InputType::Array(Type::STRUCT), TakeExec<StructImpl>},
+  std::vector<SelectionKernelData> take_kernels = {
+      {InputType(match::Primitive()), PrimitiveTake},
+      {InputType(match::BinaryLike()), TakeExec<VarBinaryImpl<BinaryType>>},
+      {InputType(match::LargeBinaryLike()), TakeExec<VarBinaryImpl<LargeBinaryType>>},
+      {InputType(Type::FIXED_SIZE_BINARY), TakeExec<FSBImpl>},
+      {InputType(null()), NullTake},
+      {InputType(Type::DECIMAL128), TakeExec<FSBImpl>},
+      {InputType(Type::DECIMAL256), TakeExec<FSBImpl>},
+      {InputType(Type::DICTIONARY), DictionaryTake},
+      {InputType(Type::EXTENSION), ExtensionTake},
+      {InputType(Type::LIST), TakeExec<ListImpl<ListType>>},
+      {InputType(Type::LARGE_LIST), TakeExec<ListImpl<LargeListType>>},
+      {InputType(Type::FIXED_SIZE_LIST), TakeExec<FSLImpl>},
+      {InputType(Type::DENSE_UNION), TakeExec<DenseUnionImpl>},
+      {InputType(Type::STRUCT), TakeExec<StructImpl>},
       // TODO: Reuse ListType kernel for MAP
-      {InputType::Array(Type::MAP), TakeExec<ListImpl<MapType>>},
+      {InputType(Type::MAP), TakeExec<ListImpl<MapType>>},
   };
 
   VectorKernel take_base;
   take_base.init = TakeState::Init;
   take_base.can_execute_chunkwise = false;
-  RegisterSelectionFunction(
-      "array_take", array_take_doc, take_base,
-      /*selection_type=*/InputType(match::Integer(), ValueDescr::ARRAY),
-      take_kernel_descrs, GetDefaultTakeOptions(), registry);
+  RegisterSelectionFunction("array_take", array_take_doc, take_base,
+                            /*selection_type=*/match::Integer(), take_kernels,
+                            GetDefaultTakeOptions(), registry);
 
   DCHECK_OK(registry->AddFunction(std::make_shared<TakeMetaFunction>()));
 
diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc
index 719015871ce..ba76bad0d17 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -45,15 +45,16 @@ namespace compute {
 namespace {
 
 struct GrouperImpl : Grouper {
-  static Result<std::unique_ptr<GrouperImpl>> Make(const std::vector<ValueDescr>& keys,
-                                                   ExecContext* ctx) {
+  static Result<std::unique_ptr<GrouperImpl>> Make(
+      const std::vector<TypeHolder>& key_types, ExecContext* ctx) {
     auto impl = ::arrow::internal::make_unique<GrouperImpl>();
 
-    impl->encoders_.resize(keys.size());
+    impl->encoders_.resize(key_types.size());
     impl->ctx_ = ctx;
 
-    for (size_t i = 0; i < keys.size(); ++i) {
-      const auto& key = keys[i].type;
+    for (size_t i = 0; i < key_types.size(); ++i) {
+      // TODO(wesm): eliminate this probably unneeded shared_ptr copy
+      std::shared_ptr<DataType> key = key_types[i].GetSharedPtr();
 
       if (key->id() == Type::BOOL) {
         impl->encoders_[i] =
@@ -198,11 +199,10 @@ struct GrouperFastImpl : Grouper {
   static constexpr int kBitmapPaddingForSIMD = 64;  // bits
   static constexpr int kPaddingForSIMD = 32;        // bytes
 
-  static bool CanUse(const std::vector<ValueDescr>& keys) {
+  static bool CanUse(const std::vector<TypeHolder>& key_types) {
 #if ARROW_LITTLE_ENDIAN
-    for (size_t i = 0; i < keys.size(); ++i) {
-      const auto& key = keys[i].type;
-      if (is_large_binary_like(key->id())) {
+    for (size_t i = 0; i < key_types.size(); ++i) {
+      if (is_large_binary_like(key_types[i].id())) {
         return false;
       }
     }
@@ -213,7 +213,7 @@ struct GrouperFastImpl : Grouper {
   }
 
   static Result<std::unique_ptr<GrouperFastImpl>> Make(
-      const std::vector<ValueDescr>& keys, ExecContext* ctx) {
+      const std::vector<TypeHolder>& keys, ExecContext* ctx) {
     auto impl = ::arrow::internal::make_unique<GrouperFastImpl>();
     impl->ctx_ = ctx;
 
@@ -227,19 +227,19 @@ struct GrouperFastImpl : Grouper {
     impl->key_types_.resize(num_columns);
     impl->dictionaries_.resize(num_columns);
     for (size_t icol = 0; icol < num_columns; ++icol) {
-      const auto& key = keys[icol].type;
-      if (key->id() == Type::DICTIONARY) {
+      const TypeHolder& key = keys[icol];
+      if (key.id() == Type::DICTIONARY) {
         auto bit_width = checked_cast<const FixedWidthType&>(*key).bit_width();
         ARROW_DCHECK(bit_width % 8 == 0);
         impl->col_metadata_[icol] = KeyColumnMetadata(true, bit_width / 8);
-      } else if (key->id() == Type::BOOL) {
+      } else if (key.id() == Type::BOOL) {
         impl->col_metadata_[icol] = KeyColumnMetadata(true, 0);
-      } else if (is_fixed_width(key->id())) {
+      } else if (is_fixed_width(key.id())) {
         impl->col_metadata_[icol] = KeyColumnMetadata(
             true, checked_cast<const FixedWidthType&>(*key).bit_width() / 8);
-      } else if (is_binary_like(key->id())) {
+      } else if (is_binary_like(key.id())) {
         impl->col_metadata_[icol] = KeyColumnMetadata(false, sizeof(uint32_t));
-      } else if (key->id() == Type::NA) {
+      } else if (key.id() == Type::NA) {
         impl->col_metadata_[icol] = KeyColumnMetadata(true, 0, /*is_null_type_in=*/true);
       } else {
         return Status::NotImplemented("Keys of type ", *key);
@@ -306,7 +306,7 @@ struct GrouperFastImpl : Grouper {
     int num_columns = batch.num_values();
     // Process dictionaries
     for (int icol = 0; icol < num_columns; ++icol) {
-      if (key_types_[icol]->id() == Type::DICTIONARY) {
+      if (key_types_[icol].id() == Type::DICTIONARY) {
         auto data = batch[icol].array();
         auto dict = MakeArray(data->dictionary);
         if (dictionaries_[icol]) {
@@ -331,7 +331,7 @@ struct GrouperFastImpl : Grouper {
       const uint8_t* varlen = NULLPTR;
 
       // Skip if the key's type is NULL
-      if (key_types_[icol]->id() != Type::NA) {
+      if (key_types_[icol].id() != Type::NA) {
         if (batch[icol].array()->buffers[0] != NULLPTR) {
           non_nulls = batch[icol].array()->buffers[0]->data();
         }
@@ -488,11 +488,11 @@ struct GrouperFastImpl : Grouper {
 
       if (col_metadata_[i].is_fixed_length) {
         out.values[i] = ArrayData::Make(
-            key_types_[i], num_groups,
+            key_types_[i].GetSharedPtr(), num_groups,
             {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i])}, null_count);
       } else {
         out.values[i] =
-            ArrayData::Make(key_types_[i], num_groups,
+            ArrayData::Make(key_types_[i].GetSharedPtr(), num_groups,
                             {std::move(non_null_bufs[i]), std::move(fixedlen_bufs[i]),
                              std::move(varlen_bufs[i])},
                             null_count);
@@ -501,11 +501,12 @@ struct GrouperFastImpl : Grouper {
 
     // Process dictionaries
     for (size_t icol = 0; icol < num_columns; ++icol) {
-      if (key_types_[icol]->id() == Type::DICTIONARY) {
+      if (key_types_[icol].id() == Type::DICTIONARY) {
         if (dictionaries_[icol]) {
           out.values[icol].array()->dictionary = dictionaries_[icol]->data();
         } else {
-          ARROW_ASSIGN_OR_RAISE(auto dict, MakeArrayOfNull(key_types_[icol], 0));
+          ARROW_ASSIGN_OR_RAISE(auto dict,
+                                MakeArrayOfNull(key_types_[icol].GetSharedPtr(), 0));
           out.values[icol].array()->dictionary = dict->data();
         }
       }
@@ -523,7 +524,7 @@ struct GrouperFastImpl : Grouper {
   arrow::util::TempVectorStack temp_stack_;
   LightContext encode_ctx_;
 
-  std::vector<std::shared_ptr<arrow::DataType>> key_types_;
+  std::vector<TypeHolder> key_types_;
   std::vector<KeyColumnMetadata> col_metadata_;
   std::vector<KeyColumnArray> cols_;
   std::vector<uint32_t> minibatch_hashes_;
@@ -538,12 +539,12 @@ struct GrouperFastImpl : Grouper {
 
 }  // namespace
 
-Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<ValueDescr>& descrs,
+Result<std::unique_ptr<Grouper>> Grouper::Make(const std::vector<TypeHolder>& key_types,
                                                ExecContext* ctx) {
-  if (GrouperFastImpl::CanUse(descrs)) {
-    return GrouperFastImpl::Make(descrs, ctx);
+  if (GrouperFastImpl::CanUse(key_types)) {
+    return GrouperFastImpl::Make(key_types, ctx);
   }
-  return GrouperImpl::Make(descrs, ctx);
+  return GrouperImpl::Make(key_types, ctx);
 }
 
 Result<std::shared_ptr<ListArray>> Grouper::ApplyGroupings(const ListArray& groupings,
diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h
index 8281b75317f..4c106794573 100644
--- a/cpp/src/arrow/compute/row/grouper.h
+++ b/cpp/src/arrow/compute/row/grouper.h
@@ -36,7 +36,7 @@ class ARROW_EXPORT Grouper {
   virtual ~Grouper() = default;
 
   /// Construct a Grouper which receives the specified key types
-  static Result<std::unique_ptr<Grouper>> Make(const std::vector<ValueDescr>& descrs,
+  static Result<std::unique_ptr<Grouper>> Make(const std::vector<TypeHolder>& key_types,
                                                ExecContext* ctx = default_exec_context());
 
   /// Consume a batch of keys, producing the corresponding group ids as an integer array.
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 127929ced58..62f15c16000 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -20,7 +20,7 @@
 namespace arrow {
 
 struct Datum;
-struct ValueDescr;
+struct TypeHolder;
 
 namespace compute {
 
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index 7108ff452f8..ca652887210 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -141,7 +141,7 @@ Result<Partitioning::PartitionedBatches> KeyValuePartitioning::Partition(
     key_batch.values.emplace_back(batch->column_data(i));
   }
 
-  ARROW_ASSIGN_OR_RAISE(auto grouper, compute::Grouper::Make(key_batch.GetDescriptors()));
+  ARROW_ASSIGN_OR_RAISE(auto grouper, compute::Grouper::Make(key_batch.GetTypes()));
 
   ARROW_ASSIGN_OR_RAISE(Datum id_batch, grouper->Consume(key_batch));
 
diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc
index 3cd5f1fcc26..d2d0923d03d 100644
--- a/cpp/src/arrow/dataset/scanner.cc
+++ b/cpp/src/arrow/dataset/scanner.cc
@@ -129,10 +129,10 @@ Status NormalizeScanOptions(const std::shared_ptr<ScanOptions>& scan_options,
               "Top level projection expression call must be make_struct");
         }
         FieldVector fields;
-        for (const auto& arg : call->arguments) {
+        for (const compute::Expression& arg : call->arguments) {
           if (auto field_ref = arg.field_ref()) {
             if (field_ref->IsName()) {
-              fields.push_back(field(*field_ref->name(), arg.type()));
+              fields.push_back(field(*field_ref->name(), arg.type()->GetSharedPtr()));
               break;
             }
           }
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 84ff0d6ff4e..f06e97a20ec 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -178,65 +178,6 @@ bool Datum::Equals(const Datum& other) const {
   }
 }
 
-ValueDescr Datum::descr() const {
-  if (this->is_arraylike()) {
-    return ValueDescr(this->type(), ValueDescr::ARRAY);
-  } else if (this->is_scalar()) {
-    return ValueDescr(this->type(), ValueDescr::SCALAR);
-  } else {
-    DCHECK(false) << "Datum is not value-like, this method should not be called";
-    return ValueDescr();
-  }
-}
-
-ValueDescr::Shape Datum::shape() const {
-  if (this->is_arraylike()) {
-    return ValueDescr::ARRAY;
-  } else if (this->is_scalar()) {
-    return ValueDescr::SCALAR;
-  } else {
-    DCHECK(false) << "Datum is not value-like, this method should not be called";
-    return ValueDescr::ANY;
-  }
-}
-
-static std::string FormatValueDescr(const ValueDescr& descr) {
-  std::stringstream ss;
-  switch (descr.shape) {
-    case ValueDescr::ANY:
-      ss << "any";
-      break;
-    case ValueDescr::ARRAY:
-      ss << "array";
-      break;
-    case ValueDescr::SCALAR:
-      ss << "scalar";
-      break;
-    default:
-      DCHECK(false);
-      break;
-  }
-  ss << "[" << descr.type->ToString() << "]";
-  return ss.str();
-}
-
-std::string ValueDescr::ToString() const { return FormatValueDescr(*this); }
-
-std::string ValueDescr::ToString(const std::vector<ValueDescr>& descrs) {
-  std::stringstream ss;
-  ss << "(";
-  for (size_t i = 0; i < descrs.size(); ++i) {
-    if (i > 0) {
-      ss << ", ";
-    }
-    ss << descrs[i].ToString();
-  }
-  ss << ")";
-  return ss.str();
-}
-
-void PrintTo(const ValueDescr& descr, std::ostream* os) { *os << descr.ToString(); }
-
 std::string Datum::ToString() const {
   switch (this->kind()) {
     case Datum::NONE:
@@ -257,20 +198,6 @@ std::string Datum::ToString() const {
   }
 }
 
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args) {
-  // This function to be deleted in ARROW-16577
-  if (args.size() == 0) {
-    return ValueDescr::ARRAY;
-  } else {
-    for (const auto& descr : args) {
-      if (descr.shape == ValueDescr::ARRAY) {
-        return ValueDescr::ARRAY;
-      }
-    }
-    return ValueDescr::SCALAR;
-  }
-}
-
 void PrintTo(const Datum& datum, std::ostream* os) {
   switch (datum.kind()) {
     case Datum::SCALAR:
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 9460c69b795..d4aaff22ce3 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -40,66 +40,6 @@ class ChunkedArray;
 class RecordBatch;
 class Table;
 
-/// \brief A descriptor type that gives the shape (array or scalar) and
-/// DataType of a Value, but without the data
-struct ARROW_EXPORT ValueDescr {
-  std::shared_ptr<DataType> type;
-  enum Shape {
-    /// \brief Either Array or Scalar
-    ANY,
-
-    /// \brief Array type
-    ARRAY,
-
-    /// \brief Only Scalar arguments supported
-    SCALAR
-  };
-
-  Shape shape;
-
-  ValueDescr() : shape(ANY) {}
-
-  ValueDescr(std::shared_ptr<DataType> type, ValueDescr::Shape shape)
-      : type(std::move(type)), shape(shape) {}
-
-  ValueDescr(std::shared_ptr<DataType> type)  // NOLINT implicit conversion
-      : type(std::move(type)), shape(ValueDescr::ANY) {}
-
-  /// \brief Convenience constructor for ANY descr
-  static ValueDescr Any(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), ANY);
-  }
-
-  /// \brief Convenience constructor for Value::ARRAY descr
-  static ValueDescr Array(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), ARRAY);
-  }
-
-  /// \brief Convenience constructor for Value::SCALAR descr
-  static ValueDescr Scalar(std::shared_ptr<DataType> type) {
-    return ValueDescr(std::move(type), SCALAR);
-  }
-
-  bool operator==(const ValueDescr& other) const {
-    if (shape != other.shape) return false;
-    if (type == other.type) return true;
-    return type && type->Equals(other.type);
-  }
-
-  bool operator!=(const ValueDescr& other) const { return !(*this == other); }
-
-  std::string ToString() const;
-  static std::string ToString(const std::vector<ValueDescr>&);
-
-  ARROW_EXPORT friend void PrintTo(const ValueDescr&, std::ostream*);
-};
-
-/// \brief For use with scalar functions, returns the broadcasted Value::Shape
-/// given a vector of value descriptors. Return SCALAR unless any value is
-/// ARRAY
-ARROW_EXPORT
-ValueDescr::Shape GetBroadcastShape(const std::vector<ValueDescr>& args);
-
 /// \class Datum
 /// \brief Variant type for various Arrow C++ data structures
 struct ARROW_EXPORT Datum {
@@ -249,14 +189,6 @@ struct ARROW_EXPORT Datum {
 
   int64_t null_count() const;
 
-  /// \brief Return the shape (array or scalar) and type for supported kinds
-  /// (ARRAY, CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
-  ValueDescr descr() const;
-
-  /// \brief Return the shape (array or scalar) for supported kinds (ARRAY,
-  /// CHUNKED_ARRAY, and SCALAR). Debug asserts otherwise
-  ValueDescr::Shape shape() const;
-
   /// \brief The value type of the variant, if any
   ///
   /// \return nullptr if no type
diff --git a/cpp/src/arrow/datum_test.cc b/cpp/src/arrow/datum_test.cc
index a5bf1728f33..8f962962a21 100644
--- a/cpp/src/arrow/datum_test.cc
+++ b/cpp/src/arrow/datum_test.cc
@@ -61,7 +61,6 @@ TEST(Datum, ImplicitConstructors) {
 
 TEST(Datum, Constructors) {
   Datum val(std::make_shared<Int64Scalar>(1));
-  ASSERT_EQ(ValueDescr::SCALAR, val.shape());
   AssertTypeEqual(*int64(), *val.type());
   ASSERT_TRUE(val.is_scalar());
   ASSERT_FALSE(val.is_array());
@@ -77,7 +76,6 @@ TEST(Datum, Constructors) {
 
   Datum val2(arr);
   ASSERT_EQ(Datum::ARRAY, val2.kind());
-  ASSERT_EQ(ValueDescr::ARRAY, val2.shape());
   AssertTypeEqual(*int64(), *val2.type());
   AssertArraysEqual(*arr, *val2.make_array());
   ASSERT_TRUE(val2.is_array());
@@ -151,28 +149,4 @@ TEST(Datum, TotalBufferSize) {
   ASSERT_EQ(4, tab_datum.TotalBufferSize());
 }
 
-TEST(ValueDescr, Basics) {
-  ValueDescr d1(utf8(), ValueDescr::SCALAR);
-  ValueDescr d2 = ValueDescr::Any(utf8());
-  ValueDescr d3 = ValueDescr::Scalar(utf8());
-  ValueDescr d4 = ValueDescr::Array(utf8());
-
-  ASSERT_EQ(ValueDescr::SCALAR, d1.shape);
-  AssertTypeEqual(*utf8(), *d1.type);
-  ASSERT_EQ(ValueDescr::Scalar(utf8()), d1);
-
-  ASSERT_EQ(ValueDescr::ANY, d2.shape);
-  AssertTypeEqual(*utf8(), *d2.type);
-  ASSERT_EQ(ValueDescr::Any(utf8()), d2);
-  ASSERT_NE(ValueDescr::Any(int32()), d2);
-
-  ASSERT_EQ(ValueDescr::SCALAR, d3.shape);
-  ASSERT_EQ(ValueDescr::ARRAY, d4.shape);
-
-  ASSERT_EQ("scalar[string]", d1.ToString());
-  ASSERT_EQ("any[string]", d2.ToString());
-  ASSERT_EQ("scalar[string]", d3.ToString());
-  ASSERT_EQ("array[string]", d4.ToString());
-}
-
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index 580966124ef..667fd00ae21 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -981,8 +981,7 @@ Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
   RETURN_NOT_OK(converter->AppendValue(json_doc));
   RETURN_NOT_OK(converter->Finish(&array));
   DCHECK_EQ(array->length(), 1);
-  ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
-  return Status::OK();
+  return array->GetScalar(0).Value(out);
 }
 
 Status DictScalarFromJSON(const std::shared_ptr<DataType>& type,
diff --git a/cpp/src/arrow/python/gdb.cc b/cpp/src/arrow/python/gdb.cc
index 122186fb35c..5a58094d4ac 100644
--- a/cpp/src/arrow/python/gdb.cc
+++ b/cpp/src/arrow/python/gdb.cc
@@ -354,7 +354,8 @@ void TestSession() {
 
   FixedSizeBinaryScalar fixed_size_binary_scalar{Buffer::FromString("abc"),
                                                  fixed_size_binary(3)};
-  FixedSizeBinaryScalar fixed_size_binary_scalar_null{fixed_size_binary(3)};
+  FixedSizeBinaryScalar fixed_size_binary_scalar_null{
+      Buffer::FromString("   "), fixed_size_binary(3), /*is_valid=*/false};
 
   std::shared_ptr<Array> dict_array;
   dict_array = *ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])");
@@ -362,37 +363,44 @@ void TestSession() {
                                dictionary(int8(), utf8())};
   DictionaryScalar dict_scalar_null{dictionary(int8(), utf8())};
 
-  std::shared_ptr<Array> list_value_array;
-  list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
+  std::shared_ptr<Array> list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
+  std::shared_ptr<Array> list_zero_length = *ArrayFromJSON(int32(), R"([])");
   ListScalar list_scalar{list_value_array};
-  ListScalar list_scalar_null{list(int32())};
+  ListScalar list_scalar_null{list_zero_length, list(int32())};
   LargeListScalar large_list_scalar{list_value_array};
-  LargeListScalar large_list_scalar_null{large_list(int32())};
+  LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32())};
   FixedSizeListScalar fixed_size_list_scalar{list_value_array};
-  FixedSizeListScalar fixed_size_list_scalar_null{fixed_size_list(int32(), 3)};
+  FixedSizeListScalar fixed_size_list_scalar_null{
+      list_value_array, fixed_size_list(int32(), 3), /*is_valid=*/false};
 
   auto struct_scalar_type = struct_({field("ints", int32()), field("strs", utf8())});
   StructScalar struct_scalar{
       ScalarVector{MakeScalar(int32_t(42)), MakeScalar("some text")}, struct_scalar_type};
-  StructScalar struct_scalar_null{struct_scalar_type};
+  StructScalar struct_scalar_null{struct_scalar.value, struct_scalar_type,
+                                  /*is_valid=*/false};
 
   auto sparse_union_scalar_type =
       sparse_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
   auto dense_union_scalar_type =
       dense_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
-  SparseUnionScalar sparse_union_scalar{MakeScalar(int32_t(43)), 7,
-                                        sparse_union_scalar_type};
-  SparseUnionScalar sparse_union_scalar_null{7, sparse_union_scalar_type};
-  DenseUnionScalar dense_union_scalar{MakeScalar(int32_t(43)), 7,
-                                      dense_union_scalar_type};
-  DenseUnionScalar dense_union_scalar_null{7, dense_union_scalar_type};
+  std::vector<std::shared_ptr<Scalar>> union_values = {MakeScalar(int32_t(43)),
+                                                       MakeNullScalar(utf8())};
+  SparseUnionScalar sparse_union_scalar{union_values, 7, sparse_union_scalar_type};
+
+  union_values[0] = MakeNullScalar(int32());
+  SparseUnionScalar sparse_union_scalar_null{union_values, 7, sparse_union_scalar_type};
+
+  DenseUnionScalar dense_union_scalar{union_values[0], 7, dense_union_scalar_type};
+  DenseUnionScalar dense_union_scalar_null{MakeNullScalar(int32()), 7,
+                                           dense_union_scalar_type};
 
   auto extension_scalar_type = std::make_shared<UuidType>();
   ExtensionScalar extension_scalar{
       std::make_shared<FixedSizeBinaryScalar>(Buffer::FromString("0123456789abcdef"),
                                               extension_scalar_type->storage_type()),
       extension_scalar_type};
-  ExtensionScalar extension_scalar_null{extension_scalar_type};
+  ExtensionScalar extension_scalar_null{extension_scalar.value, extension_scalar_type,
+                                        /*is_valid=*/false};
 
   std::shared_ptr<Scalar> heap_map_scalar;
   ARROW_CHECK_OK(
diff --git a/cpp/src/arrow/python/udf.cc b/cpp/src/arrow/python/udf.cc
index 227629eb24e..c18352e9151 100644
--- a/cpp/src/arrow/python/udf.cc
+++ b/cpp/src/arrow/python/udf.cc
@@ -27,19 +27,16 @@ using compute::ExecSpan;
 namespace py {
 
 namespace {
-Status CheckOutputType(const DataType& expected, const DataType& actual) {
-  if (!expected.Equals(actual)) {
-    return Status::TypeError("Expected output datatype ", expected.ToString(),
-                             ", but function returned datatype ", actual.ToString());
-  }
-  return Status::OK();
-}
 
-struct PythonUdf {
+struct PythonUdf : public compute::KernelState {
   ScalarUdfWrapperCallback cb;
   std::shared_ptr<OwnedRefNoGIL> function;
   compute::OutputType output_type;
 
+  PythonUdf(ScalarUdfWrapperCallback cb, std::shared_ptr<OwnedRefNoGIL> function,
+            compute::OutputType output_type)
+      : cb(cb), function(function), output_type(output_type) {}
+
   // function needs to be destroyed at process exit
   // and Python may no longer be initialized.
   ~PythonUdf() {
@@ -48,11 +45,7 @@ struct PythonUdf {
     }
   }
 
-  Status operator()(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-    return SafeCallIntoPython([&]() -> Status { return Execute(ctx, batch, out); });
-  }
-
-  Status Execute(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  Status Exec(compute::KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     const int num_args = batch.num_values();
     ScalarUdfContext udf_context{ctx->memory_pool(), batch.length};
 
@@ -60,7 +53,7 @@ struct PythonUdf {
     RETURN_NOT_OK(CheckPyError());
     for (int arg_id = 0; arg_id < num_args; arg_id++) {
       if (batch[arg_id].is_scalar()) {
-        std::shared_ptr<Scalar> c_data = batch[arg_id].scalar->Copy();
+        std::shared_ptr<Scalar> c_data = batch[arg_id].scalar->GetSharedPtr();
         PyObject* data = wrap_scalar(c_data);
         PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
       } else {
@@ -73,34 +66,29 @@ struct PythonUdf {
     OwnedRef result(cb(function->obj(), udf_context, arg_tuple.obj()));
     RETURN_NOT_OK(CheckPyError());
     // unwrapping the output for expected output type
-    if (is_scalar(result.obj())) {
-      if (out->is_array_data()) {
-        return Status::TypeError(
-            "UDF executor expected an array result but a "
-            "scalar was returned");
-      }
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> val, unwrap_scalar(result.obj()));
-      RETURN_NOT_OK(CheckOutputType(*output_type.type(), *val->type));
-      out->value = val;
-      return Status::OK();
-    } else if (is_array(result.obj())) {
-      if (out->is_scalar()) {
+    if (is_array(result.obj())) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
+      if (!output_type.type()->Equals(*val->type())) {
         return Status::TypeError(
-            "UDF executor expected a scalar result but an "
-            "array was returned");
+            "Expected output datatype ", output_type.type()->ToString(),
+            ", but function returned datatype ", val->type()->ToString());
       }
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
-      RETURN_NOT_OK(CheckOutputType(*output_type.type(), *val->type()));
       out->value = std::move(val->data());
       return Status::OK();
     } else {
       return Status::TypeError("Unexpected output type: ", Py_TYPE(result.obj())->tp_name,
-                               " (expected Scalar or Array)");
+                               " (expected Array)");
     }
     return Status::OK();
   }
 };
 
+Status PythonUdfExec(compute::KernelContext* ctx, const ExecSpan& batch,
+                     ExecResult* out) {
+  auto udf = static_cast<PythonUdf*>(ctx->kernel()->data.get());
+  return SafeCallIntoPython([&]() -> Status { return udf->Exec(ctx, batch, out); });
+}
+
 }  // namespace
 
 Status RegisterScalarFunction(PyObject* user_function, ScalarUdfWrapperCallback wrapper,
@@ -116,11 +104,14 @@ Status RegisterScalarFunction(PyObject* user_function, ScalarUdfWrapperCallback
     input_types.emplace_back(in_dtype);
   }
   compute::OutputType output_type(options.output_type);
-  PythonUdf exec{wrapper, std::make_shared<OwnedRefNoGIL>(user_function), output_type};
   compute::ScalarKernel kernel(
       compute::KernelSignature::Make(std::move(input_types), std::move(output_type),
                                      options.arity.is_varargs),
-      std::move(exec));
+      PythonUdfExec);
+
+  kernel.data = std::make_shared<PythonUdf>(
+      wrapper, std::make_shared<OwnedRefNoGIL>(user_function), output_type);
+
   kernel.mem_allocation = compute::MemAllocation::NO_PREALLOCATE;
   kernel.null_handling = compute::NullHandling::COMPUTED_NO_PREALLOCATE;
   RETURN_NOT_OK(scalar_func->AddKernel(std::move(kernel)));
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index fb097d59401..7f56c45b3a6 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
 #include "arrow/compare.h"
+#include "arrow/pretty_print.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/decimal.h"
@@ -109,12 +110,18 @@ struct ScalarHashImpl {
     return Status::OK();
   }
 
-  Status Visit(const UnionScalar& s) {
+  Status Visit(const DenseUnionScalar& s) {
     // type_code is ignored when comparing for equality, so do not hash it either
     AccumulateHashFrom(*s.value);
     return Status::OK();
   }
 
+  Status Visit(const SparseUnionScalar& s) {
+    // type_code is ignored when comparing for equality, so do not hash it either
+    AccumulateHashFrom(*s.value[s.child_id]);
+    return Status::OK();
+  }
+
   Status Visit(const ExtensionScalar& s) {
     AccumulateHashFrom(*s.value);
     return Status::OK();
@@ -221,15 +228,21 @@ struct ScalarValidateImpl {
 
   Status Visit(const LargeStringScalar& s) { return ValidateStringScalar(s); }
 
+  template <typename ScalarType>
+  Status CheckValueNotNull(const ScalarType& s) {
+    if (!s.value) {
+      return Status::Invalid(s.type->ToString(), " value is null");
+    }
+    return Status::OK();
+  }
+
   Status Visit(const FixedSizeBinaryScalar& s) {
-    RETURN_NOT_OK(ValidateBinaryScalar(s));
-    if (s.is_valid) {
-      const auto& byte_width =
-          checked_cast<const FixedSizeBinaryType&>(*s.type).byte_width();
-      if (s.value->size() != byte_width) {
-        return Status::Invalid(s.type->ToString(), " scalar should have a value of size ",
-                               byte_width, ", got ", s.value->size());
-      }
+    const auto& byte_width =
+        checked_cast<const FixedSizeBinaryType&>(*s.type).byte_width();
+    RETURN_NOT_OK(CheckValueNotNull(s));
+    if (s.value->size() != byte_width) {
+      return Status::Invalid(s.type->ToString(), " scalar should have a value of size ",
+                             byte_width, ", got ", s.value->size());
     }
     return Status::OK();
   }
@@ -252,29 +265,36 @@ struct ScalarValidateImpl {
     return Status::OK();
   }
 
-  Status Visit(const BaseListScalar& s) { return ValidateBaseListScalar(s); }
+  Status Visit(const BaseListScalar& s) {
+    RETURN_NOT_OK(CheckValueNotNull(s));
+    const auto st = full_validation_ ? s.value->ValidateFull() : s.value->Validate();
+    if (!st.ok()) {
+      return st.WithMessage(s.type->ToString(),
+                            " scalar fails validation for value: ", st.message());
+    }
+
+    const auto& list_type = checked_cast<const BaseListType&>(*s.type);
+    const auto& value_type = *list_type.value_type();
+    if (!s.value->type()->Equals(value_type)) {
+      return Status::Invalid(list_type.ToString(), " scalar should have a value of type ",
+                             value_type.ToString(), ", got ",
+                             s.value->type()->ToString());
+    }
+    return Status::OK();
+  }
 
   Status Visit(const FixedSizeListScalar& s) {
-    RETURN_NOT_OK(ValidateBaseListScalar(s));
-    if (s.is_valid) {
-      const auto& list_type = checked_cast<const FixedSizeListType&>(*s.type);
-      if (s.value->length() != list_type.list_size()) {
-        return Status::Invalid(s.type->ToString(),
-                               " scalar should have a child value of length ",
-                               list_type.list_size(), ", got ", s.value->length());
-      }
+    RETURN_NOT_OK(Visit(static_cast<const BaseListScalar&>(s)));
+    const auto& list_type = checked_cast<const FixedSizeListType&>(*s.type);
+    if (s.value->length() != list_type.list_size()) {
+      return Status::Invalid(s.type->ToString(),
+                             " scalar should have a child value of length ",
+                             list_type.list_size(), ", got ", s.value->length());
     }
     return Status::OK();
   }
 
   Status Visit(const StructScalar& s) {
-    if (!s.is_valid) {
-      if (!s.value.empty()) {
-        return Status::Invalid(s.type->ToString(),
-                               " scalar is marked null but has child values");
-      }
-      return Status::OK();
-    }
     const int num_fields = s.type->num_fields();
     const auto& fields = s.type->fields();
     if (fields.size() != s.value.size()) {
@@ -282,10 +302,6 @@ struct ScalarValidateImpl {
                              num_fields, " child values, got ", s.value.size());
     }
     for (int i = 0; i < num_fields; ++i) {
-      if (!s.value[i]) {
-        return Status::Invalid("non-null ", s.type->ToString(),
-                               " scalar has missing child value at index ", i);
-      }
       const auto st = Validate(*s.value[i]);
       if (!st.ok()) {
         return st.WithMessage(s.type->ToString(),
@@ -362,8 +378,47 @@ struct ScalarValidateImpl {
     return Status::OK();
   }
 
+  Status ValidateValue(const Scalar& s, const Scalar& value) {
+    const auto st = Validate(value);
+    if (!st.ok()) {
+      return st.WithMessage(
+          s.type->ToString(),
+          " scalar fails validation for underlying value: ", st.message());
+    }
+    return Status::OK();
+  }
+
+  Status ValidateDenseUnion(const DenseUnionScalar& s, int child_id) {
+    const auto& union_type = checked_cast<const DenseUnionType&>(*s.type);
+    const auto& field_type = *union_type.field(child_id)->type();
+    if (!field_type.Equals(*s.value->type)) {
+      return Status::Invalid(s.type->ToString(), " scalar with type code ", s.type_code,
+                             " should have an underlying value of type ",
+                             field_type.ToString(), ", got ", s.value->type->ToString());
+    }
+    return ValidateValue(s, *s.value);
+  }
+
+  Status ValidateSparseUnion(const SparseUnionScalar& s) {
+    const auto& union_type = checked_cast<const SparseUnionType&>(*s.type);
+    if (union_type.num_fields() != static_cast<int>(s.value.size())) {
+      return Status::Invalid("Sparse union scalar value had ", union_type.num_fields(),
+                             " fields but type has ", s.value.size(), " fields.");
+    }
+    for (int j = 0; j < union_type.num_fields(); ++j) {
+      const auto& field_type = *union_type.field(j)->type();
+      const Scalar& field_value = *s.value[j];
+      if (!field_type.Equals(*field_value.type)) {
+        return Status::Invalid(s.type->ToString(), " value for field ",
+                               union_type.field(j)->ToString(), " had incorrect type of ",
+                               field_value.type->ToString());
+      }
+      RETURN_NOT_OK(ValidateValue(s, field_value));
+    }
+    return Status::OK();
+  }
+
   Status Visit(const UnionScalar& s) {
-    RETURN_NOT_OK(ValidateOptionalValue(s));
     const int type_code = s.type_code;  // avoid 8-bit int types for printing
     const auto& union_type = checked_cast<const UnionType&>(*s.type);
     const auto& child_ids = union_type.child_ids();
@@ -372,37 +427,24 @@ struct ScalarValidateImpl {
       return Status::Invalid(s.type->ToString(), " scalar has invalid type code ",
                              type_code);
     }
-    if (s.is_valid) {
-      const auto& field_type = *union_type.field(child_ids[type_code])->type();
-      if (!field_type.Equals(*s.value->type)) {
-        return Status::Invalid(s.type->ToString(), " scalar with type code ", type_code,
-                               " should have an underlying value of type ",
-                               field_type.ToString(), ", got ",
-                               s.value->type->ToString());
-      }
-      const auto st = Validate(*s.value);
-      if (!st.ok()) {
-        return st.WithMessage(
-            s.type->ToString(),
-            " scalar fails validation for underlying value: ", st.message());
-      }
+    if (union_type.id() == Type::DENSE_UNION) {
+      return ValidateDenseUnion(checked_cast<const DenseUnionScalar&>(s),
+                                child_ids[type_code]);
+    } else {
+      return ValidateSparseUnion(checked_cast<const SparseUnionScalar&>(s));
     }
-    return Status::OK();
   }
 
   Status Visit(const ExtensionScalar& s) {
-    if (!s.is_valid) {
-      if (s.value) {
-        return Status::Invalid("null ", s.type->ToString(), " scalar has storage value");
-      }
-      return Status::OK();
-    }
-
     if (!s.value) {
-      return Status::Invalid("non-null ", s.type->ToString(),
-                             " scalar doesn't have storage value");
+      return Status::Invalid(s.type->ToString(), " scalar doesn't have storage value");
     }
-    if (!s.value->is_valid) {
+    if (!s.is_valid && s.value->is_valid) {
+      return Status::Invalid("null ", s.type->ToString(),
+                             " scalar has non-null storage value");
+      return Status::OK();
+    }
+    if (s.is_valid && !s.value->is_valid) {
       return Status::Invalid("non-null ", s.type->ToString(),
                              " scalar has null storage value");
     }
@@ -425,44 +467,13 @@ struct ScalarValidateImpl {
   }
 
   Status ValidateBinaryScalar(const BaseBinaryScalar& s) {
-    return ValidateOptionalValue(s);
-  }
-
-  Status ValidateBaseListScalar(const BaseListScalar& s) {
-    RETURN_NOT_OK(ValidateOptionalValue(s));
-    if (s.is_valid) {
-      const auto st = full_validation_ ? s.value->ValidateFull() : s.value->Validate();
-      if (!st.ok()) {
-        return st.WithMessage(s.type->ToString(),
-                              " scalar fails validation for value: ", st.message());
-      }
-
-      const auto& list_type = checked_cast<const BaseListType&>(*s.type);
-      const auto& value_type = *list_type.value_type();
-      if (!s.value->type()->Equals(value_type)) {
-        return Status::Invalid(
-            list_type.ToString(), " scalar should have a value of type ",
-            value_type.ToString(), ", got ", s.value->type()->ToString());
-      }
-    }
-    return Status::OK();
-  }
-
-  template <typename ScalarType>
-  Status ValidateOptionalValue(const ScalarType& s) {
-    return ValidateOptionalValue(s, s.value, "value");
-  }
-
-  template <typename ScalarType, typename ValueType>
-  Status ValidateOptionalValue(const ScalarType& s, const ValueType& value,
-                               const char* value_desc) {
     if (s.is_valid && !s.value) {
       return Status::Invalid(s.type->ToString(),
-                             " scalar is marked valid but doesn't have a ", value_desc);
+                             " scalar is marked valid but doesn't have a value");
     }
     if (!s.is_valid && s.value) {
-      return Status::Invalid(s.type->ToString(), " scalar is marked null but has a ",
-                             value_desc);
+      return Status::Invalid(s.type->ToString(),
+                             " scalar is marked null but has a value");
     }
     return Status::OK();
   }
@@ -493,29 +504,34 @@ LargeStringScalar::LargeStringScalar(std::string s)
     : LargeStringScalar(Buffer::FromString(std::move(s))) {}
 
 FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::shared_ptr<Buffer> value,
-                                             std::shared_ptr<DataType> type)
+                                             std::shared_ptr<DataType> type,
+                                             bool is_valid)
     : BinaryScalar(std::move(value), std::move(type)) {
   ARROW_CHECK_EQ(checked_cast<const FixedSizeBinaryType&>(*this->type).byte_width(),
                  this->value->size());
+  this->is_valid = is_valid;
 }
 
-FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value)
-    : BinaryScalar(value, fixed_size_binary(static_cast<int>(value->size()))) {}
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                                             bool is_valid)
+    : BinaryScalar(value, fixed_size_binary(static_cast<int>(value->size()))) {
+  this->is_valid = is_valid;
+}
 
-FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::string s)
-    : FixedSizeBinaryScalar(Buffer::FromString(std::move(s))) {}
+FixedSizeBinaryScalar::FixedSizeBinaryScalar(std::string s, bool is_valid)
+    : FixedSizeBinaryScalar(Buffer::FromString(std::move(s)), is_valid) {}
 
 BaseListScalar::BaseListScalar(std::shared_ptr<Array> value,
-                               std::shared_ptr<DataType> type)
-    : Scalar{std::move(type), true}, value(std::move(value)) {
+                               std::shared_ptr<DataType> type, bool is_valid)
+    : Scalar{std::move(type), is_valid}, value(std::move(value)) {
   ARROW_CHECK(this->type->field(0)->type()->Equals(this->value->type()));
 }
 
-ListScalar::ListScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, list(value->type())) {}
+ListScalar::ListScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, list(value->type()), is_valid) {}
 
-LargeListScalar::LargeListScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, large_list(value->type())) {}
+LargeListScalar::LargeListScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, large_list(value->type()), is_valid) {}
 
 inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pair_type) {
   ARROW_CHECK_EQ(pair_type->id(), Type::STRUCT);
@@ -523,19 +539,20 @@ inline std::shared_ptr<DataType> MakeMapType(const std::shared_ptr<DataType>& pa
   return map(pair_type->field(0)->type(), pair_type->field(1)->type());
 }
 
-MapScalar::MapScalar(std::shared_ptr<Array> value)
-    : BaseListScalar(value, MakeMapType(value->type())) {}
+MapScalar::MapScalar(std::shared_ptr<Array> value, bool is_valid)
+    : BaseListScalar(value, MakeMapType(value->type()), is_valid) {}
 
 FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value,
-                                         std::shared_ptr<DataType> type)
-    : BaseListScalar(value, std::move(type)) {
+                                         std::shared_ptr<DataType> type, bool is_valid)
+    : BaseListScalar(value, std::move(type), is_valid) {
   ARROW_CHECK_EQ(this->value->length(),
                  checked_cast<const FixedSizeListType&>(*this->type).list_size());
 }
 
-FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value)
+FixedSizeListScalar::FixedSizeListScalar(std::shared_ptr<Array> value, bool is_valid)
     : BaseListScalar(
-          value, fixed_size_list(value->type(), static_cast<int32_t>(value->length()))) {}
+          value, fixed_size_list(value->type(), static_cast<int32_t>(value->length())),
+          is_valid) {}
 
 Result<std::shared_ptr<StructScalar>> StructScalar::Make(
     ScalarVector values, std::vector<std::string> field_names) {
@@ -629,6 +646,33 @@ std::shared_ptr<DictionaryScalar> DictionaryScalar::Make(std::shared_ptr<Scalar>
                                             std::move(type), is_valid);
 }
 
+SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code,
+                                     std::shared_ptr<DataType> type)
+    : UnionScalar(std::move(type), type_code, /*is_valid=*/true),
+      value(std::move(value)) {
+  this->child_id =
+      checked_cast<const SparseUnionType&>(*this->type).child_ids()[type_code];
+
+  // Fix nullness based on whether the selected child is null
+  this->is_valid = this->value[this->child_id]->is_valid;
+}
+
+std::shared_ptr<Scalar> SparseUnionScalar::FromValue(std::shared_ptr<Scalar> value,
+                                                     int field_index,
+                                                     std::shared_ptr<DataType> type) {
+  const auto& union_type = checked_cast<const SparseUnionType&>(*type);
+  int8_t type_code = union_type.type_codes()[field_index];
+  ScalarVector field_values;
+  for (int i = 0; i < type->num_fields(); ++i) {
+    if (i == field_index) {
+      field_values.emplace_back(std::move(value));
+    } else {
+      field_values.emplace_back(MakeNullScalar(type->field(i)->type()));
+    }
+  }
+  return std::make_shared<SparseUnionScalar>(field_values, type_code, std::move(type));
+}
+
 namespace {
 
 template <typename T>
@@ -656,21 +700,66 @@ struct MakeNullImpl {
     return Status::OK();
   }
 
-  Status Visit(const SparseUnionType& type) { return MakeUnionScalar(type); }
+  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
+  Status VisitListLike(const T& type, int64_t value_size = 0) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> value,
+                          MakeArrayOfNull(type.value_type(), value_size));
+    out_ = std::make_shared<ScalarType>(std::move(value), type_, /*is_valid=*/false);
+    return Status::OK();
+  }
 
-  Status Visit(const DenseUnionType& type) { return MakeUnionScalar(type); }
+  Status Visit(const FixedSizeBinaryType& type) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> value,
+                          AllocateBuffer(type.byte_width()));
+    out_ = std::make_shared<FixedSizeBinaryScalar>(value, type_, /*is_valid=*/false);
+    return Status::OK();
+  }
 
-  template <typename T, typename ScalarType = typename TypeTraits<T>::ScalarType>
-  Status MakeUnionScalar(const T& type) {
+  Status Visit(const ListType& type) { return VisitListLike<ListType>(type); }
+
+  Status Visit(const MapType& type) { return VisitListLike<MapType>(type); }
+
+  Status Visit(const LargeListType& type) { return VisitListLike<LargeListType>(type); }
+
+  Status Visit(const FixedSizeListType& type) {
+    return VisitListLike<FixedSizeListType>(type, type.list_size());
+  }
+
+  Status Visit(const StructType& type) {
+    ScalarVector field_values;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      field_values.push_back(MakeNullScalar(type.field(i)->type()));
+    }
+    out_ = std::make_shared<StructScalar>(std::move(field_values), type_,
+                                          /*is_valid=*/false);
+    return Status::OK();
+  }
+
+  Status Visit(const SparseUnionType& type) {
     if (type.num_fields() == 0) {
       return Status::Invalid("Cannot make scalar of empty union type");
     }
-    out_ = std::make_shared<ScalarType>(type.type_codes()[0], type_);
+    ScalarVector field_values;
+    for (int i = 0; i < type.num_fields(); ++i) {
+      field_values.emplace_back(MakeNullScalar(type.field(i)->type()));
+    }
+    out_ = std::make_shared<SparseUnionScalar>(std::move(field_values),
+                                               type.type_codes()[0], type_);
+    return Status::OK();
+  }
+
+  Status Visit(const DenseUnionType& type) {
+    if (type.num_fields() == 0) {
+      return Status::Invalid("Cannot make scalar of empty union type");
+    }
+    out_ = std::make_shared<DenseUnionScalar>(MakeNullScalar(type.field(0)->type()),
+                                              type.type_codes()[0], type_);
     return Status::OK();
   }
 
   Status Visit(const ExtensionType& type) {
-    out_ = std::make_shared<ExtensionScalar>(type_);
+    out_ = std::make_shared<ExtensionScalar>(MakeNullScalar(type.storage_type()), type_,
+                                             /*is_valid=*/false);
     return Status::OK();
   }
 
@@ -703,7 +792,11 @@ std::string Scalar::ToString() const {
   if (maybe_repr.ok()) {
     return checked_cast<const StringScalar&>(*maybe_repr.ValueOrDie()).value->ToString();
   }
-  return "...";
+
+  std::string result;
+  std::shared_ptr<Array> as_array = *MakeArrayFromScalar(*this, 1);
+  DCHECK_OK(PrettyPrint(*as_array, PrettyPrintOptions::Defaults(), &result));
+  return result;
 }
 
 struct ScalarParseImpl {
@@ -948,8 +1041,15 @@ Status CastImpl(const StructScalar& from, StringScalar* to) {
 Status CastImpl(const UnionScalar& from, StringScalar* to) {
   const auto& union_ty = checked_cast<const UnionType&>(*from.type);
   std::stringstream ss;
+  const Scalar* selected_value;
+  if (from.type->id() == Type::DENSE_UNION) {
+    selected_value = checked_cast<const DenseUnionScalar&>(from).value.get();
+  } else {
+    const auto& sparse_scalar = checked_cast<const SparseUnionScalar&>(from);
+    selected_value = sparse_scalar.value[sparse_scalar.child_id].get();
+  }
   ss << "union{" << union_ty.field(union_ty.child_ids()[from.type_code])->ToString()
-     << " = " << from.value->ToString() << '}';
+     << " = " << selected_value->ToString() << '}';
   to->value = Buffer::FromString(ss.str());
   return Status::OK();
 }
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index dec9715afe1..897246b2aee 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -53,8 +53,6 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
                              public util::EqualityComparable<Scalar> {
   virtual ~Scalar() = default;
 
-  explicit Scalar(std::shared_ptr<DataType> type) : type(std::move(type)) {}
-
   /// \brief The type of the scalar value
   std::shared_ptr<DataType> type;
 
@@ -110,7 +108,7 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
   /// \brief EXPERIMENTAL Enable obtaining shared_ptr<Scalar> from a const
   /// Scalar& context. Implementation depends on enable_shared_from_this, but
   /// we may change this in the future
-  std::shared_ptr<Scalar> Copy() const {
+  std::shared_ptr<Scalar> GetSharedPtr() const {
     return const_cast<Scalar*>(this)->shared_from_this();
   }
 
@@ -136,6 +134,9 @@ struct ARROW_EXPORT NullScalar : public Scalar {
 namespace internal {
 
 struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
+  explicit PrimitiveScalarBase(std::shared_ptr<DataType> type)
+      : Scalar(std::move(type), false) {}
+
   using Scalar::Scalar;
   /// \brief Get a mutable pointer to the value of this scalar. May be null.
   virtual void* mutable_data() = 0;
@@ -311,14 +312,13 @@ struct ARROW_EXPORT LargeStringScalar : public LargeBinaryScalar {
 struct ARROW_EXPORT FixedSizeBinaryScalar : public BinaryScalar {
   using TypeClass = FixedSizeBinaryType;
 
-  FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type);
-
-  explicit FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value);
+  FixedSizeBinaryScalar(std::shared_ptr<Buffer> value, std::shared_ptr<DataType> type,
+                        bool is_valid = true);
 
-  explicit FixedSizeBinaryScalar(std::string s);
+  explicit FixedSizeBinaryScalar(const std::shared_ptr<Buffer>& value,
+                                 bool is_valid = true);
 
-  explicit FixedSizeBinaryScalar(std::shared_ptr<DataType> type)
-      : BinaryScalar(std::move(type)) {}
+  explicit FixedSizeBinaryScalar(std::string s, bool is_valid = true);
 };
 
 template <typename T>
@@ -436,7 +436,8 @@ struct ARROW_EXPORT BaseListScalar : public Scalar {
   using Scalar::Scalar;
   using ValueType = std::shared_ptr<Array>;
 
-  BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+  BaseListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                 bool is_valid = true);
 
   std::shared_ptr<Array> value;
 };
@@ -445,30 +446,31 @@ struct ARROW_EXPORT ListScalar : public BaseListScalar {
   using TypeClass = ListType;
   using BaseListScalar::BaseListScalar;
 
-  explicit ListScalar(std::shared_ptr<Array> value);
+  explicit ListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT LargeListScalar : public BaseListScalar {
   using TypeClass = LargeListType;
   using BaseListScalar::BaseListScalar;
 
-  explicit LargeListScalar(std::shared_ptr<Array> value);
+  explicit LargeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT MapScalar : public BaseListScalar {
   using TypeClass = MapType;
   using BaseListScalar::BaseListScalar;
 
-  explicit MapScalar(std::shared_ptr<Array> value);
+  explicit MapScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
   using TypeClass = FixedSizeListType;
   using BaseListScalar::BaseListScalar;
 
-  FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type);
+  FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
+                      bool is_valid = true);
 
-  explicit FixedSizeListScalar(std::shared_ptr<Array> value);
+  explicit FixedSizeListScalar(std::shared_ptr<Array> value, bool is_valid = true);
 };
 
 struct ARROW_EXPORT StructScalar : public Scalar {
@@ -479,37 +481,52 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 
   Result<std::shared_ptr<Scalar>> field(FieldRef ref) const;
 
-  StructScalar(ValueType value, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)) {}
+  StructScalar(ValueType value, std::shared_ptr<DataType> type, bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(value)) {}
 
   static Result<std::shared_ptr<StructScalar>> Make(ValueType value,
                                                     std::vector<std::string> field_names);
-
-  explicit StructScalar(std::shared_ptr<DataType> type) : Scalar(std::move(type)) {}
 };
 
 struct ARROW_EXPORT UnionScalar : public Scalar {
-  using Scalar::Scalar;
-  using ValueType = std::shared_ptr<Scalar>;
-
-  ValueType value;
   int8_t type_code;
 
-  UnionScalar(int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), false), type_code(type_code) {}
-
-  UnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(value)), type_code(type_code) {}
+ protected:
+  UnionScalar(std::shared_ptr<DataType> type, int8_t type_code, bool is_valid)
+      : Scalar(std::move(type), is_valid), type_code(type_code) {}
 };
 
 struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
-  using UnionScalar::UnionScalar;
   using TypeClass = SparseUnionType;
+
+  // Even though only one of the union values is relevant for this scalar, we
+  // nonetheless construct a vector of scalars, one per union value, to have
+  // enough data to reconstruct a valid ArraySpan of length 1 from this scalar
+  using ValueType = std::vector<std::shared_ptr<Scalar>>;
+  ValueType value;
+
+  // The value index corresponding to the active type code
+  int child_id;
+
+  SparseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type);
+
+  /// \brief Construct a SparseUnionScalar from a single value, versus having
+  /// to construct a vector of scalars
+  static std::shared_ptr<Scalar> FromValue(std::shared_ptr<Scalar> value, int field_index,
+                                           std::shared_ptr<DataType> type);
 };
 
 struct ARROW_EXPORT DenseUnionScalar : public UnionScalar {
-  using UnionScalar::UnionScalar;
   using TypeClass = DenseUnionType;
+
+  // For DenseUnionScalar, we can make a valid ArraySpan of length 1 from this
+  // scalar
+  using ValueType = std::shared_ptr<Scalar>;
+  ValueType value;
+
+  DenseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
+      : UnionScalar(std::move(type), type_code, value->is_valid),
+        value(std::move(value)) {}
 };
 
 /// \brief A Scalar value for DictionaryType
@@ -549,17 +566,18 @@ struct ARROW_EXPORT DictionaryScalar : public internal::PrimitiveScalarBase {
 /// The value is the underlying storage scalar.
 /// `is_valid` must only be true if `value` is non-null and `value->is_valid` is true
 struct ARROW_EXPORT ExtensionScalar : public Scalar {
-  using Scalar::Scalar;
   using TypeClass = ExtensionType;
   using ValueType = std::shared_ptr<Scalar>;
 
-  ExtensionScalar(std::shared_ptr<Scalar> storage, std::shared_ptr<DataType> type)
-      : Scalar(std::move(type), true), value(std::move(storage)) {}
+  ExtensionScalar(std::shared_ptr<Scalar> storage, std::shared_ptr<DataType> type,
+                  bool is_valid = true)
+      : Scalar(std::move(type), is_valid), value(std::move(storage)) {}
 
   template <typename Storage,
             typename = enable_if_t<std::is_base_of<Scalar, Storage>::value>>
-  ExtensionScalar(Storage&& storage, std::shared_ptr<DataType> type)
-      : ExtensionScalar(std::make_shared<Storage>(std::move(storage)), std::move(type)) {}
+  ExtensionScalar(Storage&& storage, std::shared_ptr<DataType> type, bool is_valid = true)
+      : ExtensionScalar(std::make_shared<Storage>(std::move(storage)), std::move(type),
+                        is_valid) {}
 
   std::shared_ptr<Scalar> value;
 };
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index c9b3a2217bc..efc96061925 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -522,11 +522,12 @@ TEST(TestBinaryScalar, Hashing) {
 }
 
 TEST(TestBinaryScalar, ValidateErrors) {
-  // Inconsistent is_valid / value
+  // Value must be null when the scalar is null
   BinaryScalar scalar(Buffer::FromString("xxx"));
   scalar.is_valid = false;
   AssertValidationFails(scalar);
 
+  // Value must be non-null
   auto null_scalar = MakeNullScalar(binary());
   null_scalar->is_valid = true;
   AssertValidationFails(*null_scalar);
@@ -615,10 +616,10 @@ TEST(TestFixedSizeBinaryScalar, Basics) {
   ASSERT_TRUE(value.is_valid);
   ASSERT_TRUE(value.type->Equals(*ex_type));
 
-  FixedSizeBinaryScalar null_value(ex_type);
+  FixedSizeBinaryScalar null_value(buf, ex_type, /*is_valid=*/false);
   ASSERT_OK(null_value.ValidateFull());
   ASSERT_FALSE(null_value.is_valid);
-  ASSERT_EQ(null_value.value, nullptr);
+  ASSERT_TRUE(null_value.value->Equals(*buf));
 
   // test Array.GetScalar
   auto ty = fixed_size_binary(3);
@@ -1056,11 +1057,11 @@ class TestListScalar : public ::testing::Test {
   }
 
   void TestValidateErrors() {
-    // Inconsistent is_valid / value
     ScalarType scalar(value_);
     scalar.is_valid = false;
-    AssertValidationFails(scalar);
+    ASSERT_OK(scalar.ValidateFull());
 
+    // Value must be defined
     scalar = ScalarType(value_);
     scalar.value = nullptr;
     AssertValidationFails(scalar);
@@ -1142,7 +1143,9 @@ TEST(TestStructScalar, NullScalar) {
   auto ty = struct_({field("a", boolean()), field("b", int32()), field("b", utf8()),
                      field("d", int64())});
 
-  StructScalar null_scalar(ty);
+  StructScalar null_scalar({MakeNullScalar(boolean()), MakeNullScalar(int32()),
+                            MakeNullScalar(utf8()), MakeNullScalar(int64())},
+                           ty, /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1153,7 +1156,7 @@ TEST(TestStructScalar, NullScalar) {
 TEST(TestStructScalar, EmptyStruct) {
   auto ty = struct_({});
 
-  StructScalar null_scalar(ty);
+  StructScalar null_scalar({}, ty, /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1177,12 +1180,12 @@ TEST(TestStructScalar, EmptyStruct) {
 TEST(TestStructScalar, ValidateErrors) {
   auto ty = struct_({field("a", utf8())});
 
-  // Inconsistent is_valid / value
+  // Values must always be defined
   StructScalar scalar({MakeScalar("hello")}, ty);
   scalar.is_valid = false;
-  AssertValidationFails(scalar);
+  ASSERT_OK(scalar.ValidateFull());
 
-  scalar = StructScalar(ty);
+  scalar = StructScalar({}, ty, /*is_valid=*/false);
   scalar.is_valid = true;
   AssertValidationFails(scalar);
 
@@ -1370,27 +1373,61 @@ TEST(TestDictionaryScalar, Cast) {
   }
 }
 
+const Scalar& GetUnionValue(const Scalar& value) {
+  if (value.type->id() == Type::DENSE_UNION) {
+    return *checked_cast<const DenseUnionScalar&>(value).value;
+  } else {
+    const auto& union_scalar = checked_cast<const SparseUnionScalar&>(value);
+    return *union_scalar.value[union_scalar.child_id];
+  }
+}
+
 void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& expected,
                               const Scalar& expected_value) {
   ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
   ASSERT_OK(scalar->ValidateFull());
   ASSERT_TRUE(scalar->Equals(expected));
 
-  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
-  ASSERT_TRUE(as_union.is_valid);
-  ASSERT_TRUE(as_union.value->Equals(expected_value));
+  ASSERT_TRUE(scalar->is_valid);
+  ASSERT_TRUE(GetUnionValue(*scalar).Equals(expected_value));
 }
 
 void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
   ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
   ASSERT_TRUE(scalar->Equals(MakeNullScalar(arr.type())));
 
-  const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
-  ASSERT_FALSE(as_union.is_valid);
-  // XXX in reality, the union array doesn't have a validity bitmap.
-  // Validity is inferred from the underlying child value, which should maybe
-  // be reflected here...
-  ASSERT_EQ(as_union.value, nullptr);
+  ASSERT_FALSE(scalar->is_valid);
+  ASSERT_FALSE(GetUnionValue(*scalar).is_valid);
+}
+
+std::shared_ptr<Scalar> MakeUnionScalar(const SparseUnionType& type,
+                                        std::shared_ptr<Scalar> field_value,
+                                        int field_index) {
+  return SparseUnionScalar::FromValue(field_value, field_index, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeUnionScalar(const DenseUnionType& type,
+                                        std::shared_ptr<Scalar> field_value,
+                                        int field_index) {
+  int8_t type_code = type.type_codes()[field_index];
+  return std::make_shared<DenseUnionScalar>(field_value, type_code, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeSpecificNullScalar(const DenseUnionType& type,
+                                               int field_index) {
+  int8_t type_code = type.type_codes()[field_index];
+  auto value = MakeNullScalar(type.field(field_index)->type());
+  return std::make_shared<DenseUnionScalar>(value, type_code, type.GetSharedPtr());
+}
+
+std::shared_ptr<Scalar> MakeSpecificNullScalar(const SparseUnionType& type,
+                                               int field_index) {
+  ScalarVector field_values;
+  for (int i = 0; i < type.num_fields(); ++i) {
+    field_values.emplace_back(MakeNullScalar(type.field(i)->type()));
+  }
+  return std::make_shared<SparseUnionScalar>(field_values, type.type_codes()[field_index],
+                                             type.GetSharedPtr());
 }
 
 template <typename Type>
@@ -1403,18 +1440,29 @@ class TestUnionScalar : public ::testing::Test {
     type_.reset(new UnionType({field("string", utf8()), field("number", uint64()),
                                field("other_number", uint64())},
                               /*type_codes=*/{3, 42, 43}));
+    union_type_ = static_cast<const Type*>(type_.get());
+
     alpha_ = MakeScalar("alpha");
     beta_ = MakeScalar("beta");
     ASSERT_OK_AND_ASSIGN(two_, MakeScalar(uint64(), 2));
     ASSERT_OK_AND_ASSIGN(three_, MakeScalar(uint64(), 3));
 
-    union_alpha_ = std::make_shared<ScalarType>(alpha_, 3, type_);
-    union_beta_ = std::make_shared<ScalarType>(beta_, 3, type_);
-    union_two_ = std::make_shared<ScalarType>(two_, 42, type_);
-    union_other_two_ = std::make_shared<ScalarType>(two_, 43, type_);
-    union_three_ = std::make_shared<ScalarType>(three_, 42, type_);
-    union_string_null_ = MakeSpecificNullScalar(3);
-    union_number_null_ = MakeSpecificNullScalar(42);
+    union_alpha_ = ScalarFromValue(0, alpha_);
+    union_beta_ = ScalarFromValue(0, beta_);
+    union_two_ = ScalarFromValue(1, two_);
+    union_other_two_ = ScalarFromValue(2, two_);
+    union_three_ = ScalarFromValue(1, three_);
+    union_string_null_ = SpecificNull(0);
+    union_number_null_ = SpecificNull(1);
+  }
+
+  std::shared_ptr<Scalar> ScalarFromValue(int field_index,
+                                          std::shared_ptr<Scalar> field_value) {
+    return MakeUnionScalar(*union_type_, field_value, field_index);
+  }
+
+  std::shared_ptr<Scalar> SpecificNull(int field_index) {
+    return MakeSpecificNullScalar(*union_type_, field_index);
   }
 
   void TestValidate() {
@@ -1429,19 +1477,37 @@ class TestUnionScalar : public ::testing::Test {
 
   void TestValidateErrors() {
     // Type code doesn't exist
-    AssertValidationFails(ScalarType(alpha_, 0, type_));
-    AssertValidationFails(ScalarType(alpha_, 0, type_));
-    AssertValidationFails(ScalarType(0, type_));
-    AssertValidationFails(ScalarType(alpha_, -42, type_));
-    AssertValidationFails(ScalarType(-42, type_));
+    auto scalar = ScalarFromValue(0, alpha_);
+    UnionScalar* union_scalar = static_cast<UnionScalar*>(scalar.get());
+
+    // Invalid type code
+    union_scalar->type_code = 0;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->is_valid = false;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->type_code = -42;
+    union_scalar->is_valid = true;
+    AssertValidationFails(*union_scalar);
+
+    union_scalar->is_valid = false;
+    AssertValidationFails(*union_scalar);
 
     // Type code doesn't correspond to child type
-    AssertValidationFails(ScalarType(alpha_, 42, type_));
-    AssertValidationFails(ScalarType(two_, 3, type_));
+    if (type_->id() == ::arrow::Type::DENSE_UNION) {
+      union_scalar->type_code = 42;
+      union_scalar->is_valid = true;
+      AssertValidationFails(*union_scalar);
+
+      scalar = ScalarFromValue(2, two_);
+      union_scalar = static_cast<UnionScalar*>(scalar.get());
+      union_scalar->type_code = 3;
+      AssertValidationFails(*union_scalar);
+    }
 
     // underlying value has invalid UTF8
-    auto invalid_utf8 = std::make_shared<StringScalar>("\xff");
-    auto scalar = std::make_shared<ScalarType>(invalid_utf8, 3, type_);
+    scalar = ScalarFromValue(0, std::make_shared<StringScalar>("\xff"));
     ASSERT_OK(scalar->Validate());
     ASSERT_RAISES(Invalid, scalar->ValidateFull());
   }
@@ -1466,20 +1532,23 @@ class TestUnionScalar : public ::testing::Test {
     const auto& as_union = checked_cast<const UnionScalar&>(*scalar);
     AssertTypeEqual(type_, as_union.type);
     ASSERT_FALSE(as_union.is_valid);
-    ASSERT_EQ(as_union.value, nullptr);
-    // Abstractly, the type code must be valid.
-    // Concretely, the first child field is chosen.
+
+    // The first child field is chosen arbitrarily for the purposes of making
+    // a null scalar
     ASSERT_EQ(as_union.type_code, 3);
-  }
 
- protected:
-  std::shared_ptr<Scalar> MakeSpecificNullScalar(int8_t type_code) {
-    auto scal = MakeNullScalar(type_);
-    checked_cast<UnionScalar*>(scal.get())->type_code = type_code;
-    return scal;
+    if (type_->id() == ::arrow::Type::DENSE_UNION) {
+      const auto& as_dense_union = checked_cast<const DenseUnionScalar&>(*scalar);
+      ASSERT_FALSE(as_dense_union.value->is_valid);
+    } else {
+      const auto& as_sparse_union = checked_cast<const SparseUnionScalar&>(*scalar);
+      ASSERT_FALSE(as_sparse_union.value[as_sparse_union.child_id]->is_valid);
+    }
   }
 
+ protected:
   std::shared_ptr<DataType> type_;
+  const UnionType* union_type_;
   std::shared_ptr<Scalar> alpha_, beta_, two_, three_;
   std::shared_ptr<Scalar> union_alpha_, union_beta_, union_two_, union_three_,
       union_other_two_, union_string_null_, union_number_null_;
@@ -1575,7 +1644,8 @@ TEST_F(TestExtensionScalar, Basics) {
   ASSERT_OK(uuid_scalar2.ValidateFull());
   ASSERT_TRUE(uuid_scalar2.is_valid);
 
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   ASSERT_OK(null_scalar.ValidateFull());
   ASSERT_FALSE(null_scalar.is_valid);
 
@@ -1585,7 +1655,8 @@ TEST_F(TestExtensionScalar, Basics) {
 }
 
 TEST_F(TestExtensionScalar, MakeScalar) {
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   const ExtensionScalar uuid_scalar = MakeUuidScalar(uuid_string1_);
 
   auto scalar = CheckMakeNullScalar(type_);
@@ -1608,7 +1679,8 @@ TEST_F(TestExtensionScalar, MakeScalar) {
 }
 
 TEST_F(TestExtensionScalar, GetScalar) {
-  const ExtensionScalar null_scalar(type_);
+  const ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                                    /*is_valid=*/false);
   const ExtensionScalar uuid_scalar = MakeUuidScalar(uuid_string1_);
   const ExtensionScalar uuid_scalar2 = MakeUuidScalar(uuid_string2_);
 
@@ -1639,7 +1711,8 @@ TEST_F(TestExtensionScalar, GetScalar) {
 
 TEST_F(TestExtensionScalar, ValidateErrors) {
   // Mismatching is_valid and value
-  ExtensionScalar null_scalar(type_);
+  ExtensionScalar null_scalar(MakeNullScalar(storage_type_), type_,
+                              /*is_valid=*/false);
   null_scalar.is_valid = true;
   AssertValidationFails(null_scalar);
 
@@ -1648,17 +1721,20 @@ TEST_F(TestExtensionScalar, ValidateErrors) {
   AssertValidationFails(uuid_scalar);
 
   // Null storage scalar
-  auto null_storage = std::make_shared<FixedSizeBinaryScalar>(storage_type_);
+  auto null_storage = MakeNullScalar(storage_type_);
   ExtensionScalar scalar(null_storage, type_);
   scalar.is_valid = true;
   AssertValidationFails(scalar);
+
+  // If the scalar is null it's okay
   scalar.is_valid = false;
-  AssertValidationFails(scalar);
+  ASSERT_OK(scalar.ValidateFull());
 
   // Invalid storage scalar (wrong length)
-  auto invalid_storage = std::make_shared<FixedSizeBinaryScalar>(storage_type_);
+  std::shared_ptr<Scalar> invalid_storage = MakeNullScalar(storage_type_);
   invalid_storage->is_valid = true;
-  invalid_storage->value = std::make_shared<Buffer>("123");
+  static_cast<FixedSizeBinaryScalar*>(invalid_storage.get())->value =
+      std::make_shared<Buffer>("123");
   AssertValidationFails(*invalid_storage);
   scalar = ExtensionScalar(invalid_storage, type_);
   AssertValidationFails(scalar);
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 40fe748589f..efff07db667 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -388,6 +388,29 @@ std::ostream& operator<<(std::ostream& os, const DataType& type) {
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const TypeHolder& type) {
+  os << type.ToString();
+  return os;
+}
+
+// ----------------------------------------------------------------------
+// TypeHolder
+
+std::string TypeHolder::ToString(const std::vector<TypeHolder>& types) {
+  std::stringstream ss;
+  ss << "(";
+  for (size_t i = 0; i < types.size(); ++i) {
+    if (i > 0) {
+      ss << ", ";
+    }
+    ss << types[i].type->ToString();
+  }
+  ss << ")";
+  return ss.str();
+}
+
+// ----------------------------------------------------------------------
+
 FloatingPointType::Precision HalfFloatType::precision() const {
   return FloatingPointType::HALF;
 }
@@ -2146,7 +2169,7 @@ Status DataType::Accept(TypeVisitor* visitor) const {
 }
 
 #define TYPE_FACTORY(NAME, KLASS)                                        \
-  std::shared_ptr<DataType> NAME() {                                     \
+  const std::shared_ptr<DataType>& NAME() {                              \
     static std::shared_ptr<DataType> result = std::make_shared<KLASS>(); \
     return result;                                                       \
   }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index b1b6d088f92..33503c6be6a 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -191,7 +191,7 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
   // context. Implementation requires enable_shared_from_this but we may fix
   // this in the future
-  std::shared_ptr<DataType> Copy() const {
+  std::shared_ptr<DataType> GetSharedPtr() const {
     return const_cast<DataType*>(this)->shared_from_this();
   }
 
@@ -210,9 +210,68 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
 };
 
+/// \brief EXPERIMENTAL: Container for a type pointer which can hold a
+/// dynamically created shared_ptr<DataType> if it needs to.
+struct ARROW_EXPORT TypeHolder {
+  const DataType* type = NULLPTR;
+  std::shared_ptr<DataType> owned_type;
+
+  TypeHolder() = default;
+  TypeHolder(const TypeHolder& other) = default;
+  TypeHolder& operator=(const TypeHolder& other) = default;
+  TypeHolder(TypeHolder&& other) = default;
+  TypeHolder& operator=(TypeHolder&& other) = default;
+
+  TypeHolder(std::shared_ptr<DataType> owned_type)  // NOLINT implicit construction
+      : type(owned_type.get()), owned_type(std::move(owned_type)) {}
+
+  TypeHolder(const DataType* type)  // NOLINT implicit construction
+      : type(type) {}
+
+  Type::type id() const { return this->type->id(); }
+
+  std::shared_ptr<DataType> GetSharedPtr() const {
+    return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
+  }
+
+  const DataType& operator*() const { return *this->type; }
+
+  operator bool() { return this->type != NULLPTR; }
+
+  bool operator==(const TypeHolder& other) const {
+    if (type == other.type) return true;
+    if (type == NULLPTR || other.type == NULLPTR) return false;
+    return type->Equals(*other.type);
+  }
+
+  bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }
+
+  bool operator==(const DataType& other) const {
+    if (this->type == NULLPTR) return false;
+    return other.Equals(*this->type);
+  }
+
+  bool operator!=(const DataType& other) const { return !(*this == other); }
+
+  bool operator==(const std::shared_ptr<DataType>& other) const {
+    return *this == *other;
+  }
+
+  bool operator!=(const TypeHolder& other) const { return !(*this == other); }
+
+  std::string ToString() const {
+    return this->type ? this->type->ToString() : "<NULLPTR>";
+  }
+
+  static std::string ToString(const std::vector<TypeHolder>&);
+};
+
 ARROW_EXPORT
 std::ostream& operator<<(std::ostream& os, const DataType& type);
 
+ARROW_EXPORT
+std::ostream& operator<<(std::ostream& os, const TypeHolder& type);
+
 inline bool operator==(const DataType& lhs, const DataType& rhs) {
   return lhs.Equals(rhs);
 }
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 45afd7af2e6..6d34f1147e1 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -81,7 +81,7 @@ class RecordBatchReader;
 class Table;
 
 struct Datum;
-struct ValueDescr;
+struct TypeHolder;
 
 using ChunkedArrayVector = std::vector<std::shared_ptr<ChunkedArray>>;
 using RecordBatchVector = std::vector<std::shared_ptr<RecordBatch>>;
@@ -416,43 +416,43 @@ struct Type {
 /// @{
 
 /// \brief Return a NullType instance
-std::shared_ptr<DataType> ARROW_EXPORT null();
+const std::shared_ptr<DataType>& ARROW_EXPORT null();
 /// \brief Return a BooleanType instance
-std::shared_ptr<DataType> ARROW_EXPORT boolean();
+const std::shared_ptr<DataType>& ARROW_EXPORT boolean();
 /// \brief Return a Int8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int8();
+const std::shared_ptr<DataType>& ARROW_EXPORT int8();
 /// \brief Return a Int16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int16();
+const std::shared_ptr<DataType>& ARROW_EXPORT int16();
 /// \brief Return a Int32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int32();
+const std::shared_ptr<DataType>& ARROW_EXPORT int32();
 /// \brief Return a Int64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT int64();
+const std::shared_ptr<DataType>& ARROW_EXPORT int64();
 /// \brief Return a UInt8Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint8();
+const std::shared_ptr<DataType>& ARROW_EXPORT uint8();
 /// \brief Return a UInt16Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint16();
+const std::shared_ptr<DataType>& ARROW_EXPORT uint16();
 /// \brief Return a UInt32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint32();
+const std::shared_ptr<DataType>& ARROW_EXPORT uint32();
 /// \brief Return a UInt64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT uint64();
+const std::shared_ptr<DataType>& ARROW_EXPORT uint64();
 /// \brief Return a HalfFloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float16();
+const std::shared_ptr<DataType>& ARROW_EXPORT float16();
 /// \brief Return a FloatType instance
-std::shared_ptr<DataType> ARROW_EXPORT float32();
+const std::shared_ptr<DataType>& ARROW_EXPORT float32();
 /// \brief Return a DoubleType instance
-std::shared_ptr<DataType> ARROW_EXPORT float64();
+const std::shared_ptr<DataType>& ARROW_EXPORT float64();
 /// \brief Return a StringType instance
-std::shared_ptr<DataType> ARROW_EXPORT utf8();
+const std::shared_ptr<DataType>& ARROW_EXPORT utf8();
 /// \brief Return a LargeStringType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_utf8();
+const std::shared_ptr<DataType>& ARROW_EXPORT large_utf8();
 /// \brief Return a BinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT binary();
+const std::shared_ptr<DataType>& ARROW_EXPORT binary();
 /// \brief Return a LargeBinaryType instance
-std::shared_ptr<DataType> ARROW_EXPORT large_binary();
+const std::shared_ptr<DataType>& ARROW_EXPORT large_binary();
 /// \brief Return a Date32Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date32();
+const std::shared_ptr<DataType>& ARROW_EXPORT date32();
 /// \brief Return a Date64Type instance
-std::shared_ptr<DataType> ARROW_EXPORT date64();
+const std::shared_ptr<DataType>& ARROW_EXPORT date64();
 
 /// \brief Create a FixedSizeBinaryType instance.
 ARROW_EXPORT
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 80d7d66f2fd..221b35ce573 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -1045,6 +1045,19 @@ static inline int bit_width(Type::type type_id) {
   return 0;
 }
 
+static inline bool is_list_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::LIST:
+    case Type::LARGE_LIST:
+    case Type::FIXED_SIZE_LIST:
+    case Type::MAP:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 static inline bool is_nested(Type::type type_id) {
   switch (type_id) {
     case Type::LIST:

From 85aa291c8f087fa60ac61395d6d863472d522086 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Tue, 5 Jul 2022 15:54:38 -0500
Subject: [PATCH 2/9] Fix Python bindings

---
 cpp/gdb_arrow.py                     | 41 ++++++++++++++++++++++------
 cpp/src/arrow/python/gdb.cc          | 10 +++----
 cpp/src/arrow/python/udf.cc          | 14 +++++-----
 python/pyarrow/includes/libarrow.pxd | 12 +++++++-
 python/pyarrow/scalar.pxi            | 26 ++++++++++++------
 python/pyarrow/tests/test_gdb.py     |  2 +-
 6 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index 2237da4cc98..af3dad9c087 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -1409,9 +1409,9 @@ def to_string(self):
         bufptr = BufferPtr(SharedPtr(self.val['value']).get())
         if bufptr.data is None:
             return f"{self._format_type()} of size {size}, <unallocated>"
-        nullness = 'non-null' if self.is_valid else 'null'
-        return (f"{self._format_type()} {nullness} of size {size}, "
-                f"value buffer {self._format_buf(bufptr)}")
+        nullness = '' if self.is_valid else 'null with '
+        return (f"{self._format_type()} of size {size}, "
+                f"{nullness}value {self._format_buf(bufptr)}")
 
 
 class DictionaryScalarPrinter(ScalarPrinter):
@@ -1449,6 +1449,8 @@ def display_hint(self):
         return 'map'
 
     def children(self):
+        if not self.is_valid:
+            return None
         eval_fields = StdVector(self.type['children_'])
         eval_values = StdVector(self.val['value'])
         for field, value in zip(eval_fields, eval_values):
@@ -1462,7 +1464,24 @@ def to_string(self):
         return f"{self._format_type()}"
 
 
-class UnionScalarPrinter(ScalarPrinter):
+class SparseUnionScalarPrinter(ScalarPrinter):
+    """
+    Pretty-printer for arrow::UnionScalar and subclasses.
+    """
+
+    def to_string(self):
+        type_code = self.val['type_code'].cast(gdb.lookup_type('int'))
+        if not self.is_valid:
+            return (f"{self._format_type()} of type {self.type}, "
+                    f"type code {type_code}, null value")
+        eval_values = StdVector(self.val['value'])
+        child_id = self.val['child_id'].cast(gdb.lookup_type('int'))
+        return (f"{self._format_type()} of type code {type_code}, "
+                f"value {deref(eval_values[child_id])}")
+
+
+
+class DenseUnionScalarPrinter(ScalarPrinter):
     """
     Pretty-printer for arrow::UnionScalar and subclasses.
     """
@@ -1967,10 +1986,16 @@ class StructTypeClass(DataTypeClass):
     scalar_printer = StructScalarPrinter
 
 
-class UnionTypeClass(DataTypeClass):
+class DenseUnionTypeClass(DataTypeClass):
+    is_parametric = True
+    type_printer = UnionTypePrinter
+    scalar_printer = DenseUnionScalarPrinter
+
+
+class SparseUnionTypeClass(DataTypeClass):
     is_parametric = True
     type_printer = UnionTypePrinter
-    scalar_printer = UnionScalarPrinter
+    scalar_printer = SparseUnionScalarPrinter
 
 
 class DictionaryTypeClass(DataTypeClass):
@@ -2036,8 +2061,8 @@ class ExtensionTypeClass(DataTypeClass):
     Type.MAP: DataTypeTraits(MapTypeClass, 'MapType'),
 
     Type.STRUCT: DataTypeTraits(StructTypeClass, 'StructType'),
-    Type.SPARSE_UNION: DataTypeTraits(UnionTypeClass, 'SparseUnionType'),
-    Type.DENSE_UNION: DataTypeTraits(UnionTypeClass, 'DenseUnionType'),
+    Type.SPARSE_UNION: DataTypeTraits(SparseUnionTypeClass, 'SparseUnionType'),
+    Type.DENSE_UNION: DataTypeTraits(DenseUnionTypeClass, 'DenseUnionType'),
 
     Type.DICTIONARY: DataTypeTraits(DictionaryTypeClass, 'DictionaryType'),
     Type.EXTENSION: DataTypeTraits(ExtensionTypeClass, 'ExtensionType'),
diff --git a/cpp/src/arrow/python/gdb.cc b/cpp/src/arrow/python/gdb.cc
index 5a58094d4ac..6b388139120 100644
--- a/cpp/src/arrow/python/gdb.cc
+++ b/cpp/src/arrow/python/gdb.cc
@@ -366,9 +366,10 @@ void TestSession() {
   std::shared_ptr<Array> list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
   std::shared_ptr<Array> list_zero_length = *ArrayFromJSON(int32(), R"([])");
   ListScalar list_scalar{list_value_array};
-  ListScalar list_scalar_null{list_zero_length, list(int32())};
+  ListScalar list_scalar_null{list_zero_length, list(int32()), /*is_valid=*/false};
   LargeListScalar large_list_scalar{list_value_array};
-  LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32())};
+  LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32()),
+      /*is_valid=*/false};
   FixedSizeListScalar fixed_size_list_scalar{list_value_array};
   FixedSizeListScalar fixed_size_list_scalar_null{
       list_value_array, fixed_size_list(int32(), 3), /*is_valid=*/false};
@@ -386,12 +387,11 @@ void TestSession() {
   std::vector<std::shared_ptr<Scalar>> union_values = {MakeScalar(int32_t(43)),
                                                        MakeNullScalar(utf8())};
   SparseUnionScalar sparse_union_scalar{union_values, 7, sparse_union_scalar_type};
+  DenseUnionScalar dense_union_scalar{union_values[0], 7, dense_union_scalar_type};
 
   union_values[0] = MakeNullScalar(int32());
   SparseUnionScalar sparse_union_scalar_null{union_values, 7, sparse_union_scalar_type};
-
-  DenseUnionScalar dense_union_scalar{union_values[0], 7, dense_union_scalar_type};
-  DenseUnionScalar dense_union_scalar_null{MakeNullScalar(int32()), 7,
+  DenseUnionScalar dense_union_scalar_null{union_values[0], 7,
                                            dense_union_scalar_type};
 
   auto extension_scalar_type = std::make_shared<UuidType>();
diff --git a/cpp/src/arrow/python/udf.cc b/cpp/src/arrow/python/udf.cc
index c18352e9151..074894bb4c1 100644
--- a/cpp/src/arrow/python/udf.cc
+++ b/cpp/src/arrow/python/udf.cc
@@ -31,10 +31,10 @@ namespace {
 struct PythonUdf : public compute::KernelState {
   ScalarUdfWrapperCallback cb;
   std::shared_ptr<OwnedRefNoGIL> function;
-  compute::OutputType output_type;
+  std::shared_ptr<DataType> output_type;
 
   PythonUdf(ScalarUdfWrapperCallback cb, std::shared_ptr<OwnedRefNoGIL> function,
-            compute::OutputType output_type)
+            const std::shared_ptr<DataType>& output_type)
       : cb(cb), function(function), output_type(output_type) {}
 
   // function needs to be destroyed at process exit
@@ -68,9 +68,9 @@ struct PythonUdf : public compute::KernelState {
     // unwrapping the output for expected output type
     if (is_array(result.obj())) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
-      if (!output_type.type()->Equals(*val->type())) {
+      if (!output_type->Equals(*val->type())) {
         return Status::TypeError(
-            "Expected output datatype ", output_type.type()->ToString(),
+            "Expected output datatype ", output_type->ToString(),
             ", but function returned datatype ", val->type()->ToString());
       }
       out->value = std::move(val->data());
@@ -104,13 +104,13 @@ Status RegisterScalarFunction(PyObject* user_function, ScalarUdfWrapperCallback
     input_types.emplace_back(in_dtype);
   }
   compute::OutputType output_type(options.output_type);
+  auto udf_data = std::make_shared<PythonUdf>(
+      wrapper, std::make_shared<OwnedRefNoGIL>(user_function), options.output_type);
   compute::ScalarKernel kernel(
       compute::KernelSignature::Make(std::move(input_types), std::move(output_type),
                                      options.arity.is_varargs),
       PythonUdfExec);
-
-  kernel.data = std::make_shared<PythonUdf>(
-      wrapper, std::make_shared<OwnedRefNoGIL>(user_function), output_type);
+  kernel.data = std::move(udf_data);
 
   kernel.mem_allocation = compute::MemAllocation::NO_PREALLOCATE;
   kernel.null_handling = compute::NullHandling::COMPUTED_NO_PREALLOCATE;
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index ee5446fd570..4cbcef84e88 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1083,10 +1083,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CResult[shared_ptr[CScalar]] GetEncodedValue()
 
     cdef cppclass CUnionScalar" arrow::UnionScalar"(CScalar):
-        shared_ptr[CScalar] value
         int8_t type_code
 
+    cdef cppclass CDenseUnionScalar" arrow::DenseUnionScalar"(CUnionScalar):
+        shared_ptr[CScalar] value
+
+    cdef cppclass CSparseUnionScalar" arrow::SparseUnionScalar"(CUnionScalar):
+        vector[shared_ptr[CScalar]] value
+        int child_id
+
     cdef cppclass CExtensionScalar" arrow::ExtensionScalar"(CScalar):
+        CExtensionScalar(shared_ptr[CScalar] storage,
+                         shared_ptr[CDataType], c_bool is_valid)
         shared_ptr[CScalar] value
 
     shared_ptr[CScalar] MakeScalar[Value](Value value)
@@ -1112,6 +1120,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CResult[shared_ptr[CTable]] UnifyTable(
             const CTable& table, CMemoryPool* pool)
 
+    shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type)
+
 
 cdef extern from "arrow/builder.h" namespace "arrow" nogil:
 
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 10e3b7af44f..5995242b2d4 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -860,8 +860,14 @@ cdef class UnionScalar(Scalar):
         """
         Return underlying value as a scalar.
         """
-        cdef CUnionScalar* sp = <CUnionScalar*> self.wrapped.get()
-        return Scalar.wrap(sp.value) if sp.is_valid else None
+        cdef CSparseUnionScalar* sp
+        cdef CDenseUnionScalar* dp
+        if self.type.id == _Type_SPARSE_UNION:
+            sp = <CSparseUnionScalar*> self.wrapped.get()
+            return Scalar.wrap(sp.value[sp.child_id]) if sp.is_valid else None
+        else:
+            dp = <CDenseUnionScalar*> self.wrapped.get()
+            return Scalar.wrap(dp.value) if dp.is_valid else None
 
     def as_py(self):
         """
@@ -919,6 +925,7 @@ cdef class ExtensionScalar(Scalar):
         """
         cdef:
             shared_ptr[CExtensionScalar] sp_scalar
+            shared_ptr[CScalar] sp_storage
             CExtensionScalar* ext_scalar
 
         if value is None:
@@ -932,12 +939,15 @@ cdef class ExtensionScalar(Scalar):
         else:
             storage = scalar(value, typ.storage_type)
 
-        sp_scalar = make_shared[CExtensionScalar](typ.sp_type)
-        ext_scalar = sp_scalar.get()
-        ext_scalar.is_valid = storage is not None and storage.is_valid
-        if ext_scalar.is_valid:
-            ext_scalar.value = pyarrow_unwrap_scalar(storage)
-        check_status(ext_scalar.Validate())
+        cdef c_bool is_valid = storage is not None and storage.is_valid
+        if is_valid:
+            sp_storage = pyarrow_unwrap_scalar(storage)
+        else:
+            sp_storage = MakeNullScalar((<DataType> typ.storage_type).sp_type)
+        sp_scalar = make_shared[CExtensionScalar](sp_storage, typ.sp_type,
+                                                  is_valid)
+        with nogil:
+            check_status(sp_scalar.get().Validate())
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index e5a4f9c5084..89c42648d24 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -683,7 +683,7 @@ def test_scalars_stack(gdb_arrow):
         'arrow::FixedSizeBinaryScalar of size 3, value "abc"')
     check_stack_repr(
         gdb_arrow, "fixed_size_binary_scalar_null",
-        'arrow::FixedSizeBinaryScalar of size 3, null value')
+        'arrow::FixedSizeBinaryScalar of size 3, null with value "   "')
 
     check_stack_repr(
         gdb_arrow, "dict_scalar",

From 3152b351a27211195962e60b0e7011b6ec22e24e Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Tue, 5 Jul 2022 16:00:43 -0500
Subject: [PATCH 3/9] Fix R bindings compilation, but there are 3 failing tests
 to investigate

---
 cpp/src/arrow/python/gdb.cc | 5 ++---
 cpp/src/arrow/python/udf.cc | 6 +++---
 r/src/arrowExports.cpp      | 4 ++--
 r/src/expression.cpp        | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/python/gdb.cc b/cpp/src/arrow/python/gdb.cc
index 6b388139120..944e1e96d71 100644
--- a/cpp/src/arrow/python/gdb.cc
+++ b/cpp/src/arrow/python/gdb.cc
@@ -369,7 +369,7 @@ void TestSession() {
   ListScalar list_scalar_null{list_zero_length, list(int32()), /*is_valid=*/false};
   LargeListScalar large_list_scalar{list_value_array};
   LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32()),
-      /*is_valid=*/false};
+                                         /*is_valid=*/false};
   FixedSizeListScalar fixed_size_list_scalar{list_value_array};
   FixedSizeListScalar fixed_size_list_scalar_null{
       list_value_array, fixed_size_list(int32(), 3), /*is_valid=*/false};
@@ -391,8 +391,7 @@ void TestSession() {
 
   union_values[0] = MakeNullScalar(int32());
   SparseUnionScalar sparse_union_scalar_null{union_values, 7, sparse_union_scalar_type};
-  DenseUnionScalar dense_union_scalar_null{union_values[0], 7,
-                                           dense_union_scalar_type};
+  DenseUnionScalar dense_union_scalar_null{union_values[0], 7, dense_union_scalar_type};
 
   auto extension_scalar_type = std::make_shared<UuidType>();
   ExtensionScalar extension_scalar{
diff --git a/cpp/src/arrow/python/udf.cc b/cpp/src/arrow/python/udf.cc
index 074894bb4c1..81bf47c0ade 100644
--- a/cpp/src/arrow/python/udf.cc
+++ b/cpp/src/arrow/python/udf.cc
@@ -69,9 +69,9 @@ struct PythonUdf : public compute::KernelState {
     if (is_array(result.obj())) {
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
       if (!output_type->Equals(*val->type())) {
-        return Status::TypeError(
-            "Expected output datatype ", output_type->ToString(),
-            ", but function returned datatype ", val->type()->ToString());
+        return Status::TypeError("Expected output datatype ", output_type->ToString(),
+                                 ", but function returned datatype ",
+                                 val->type()->ToString());
       }
       out->value = std::move(val->data());
       return Status::OK();
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 62c8b6695c8..5e207d657c7 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4247,7 +4247,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-int RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x);
+r_vec_size RecordBatch__num_rows(const std::shared_ptr<arrow::RecordBatch>& x);
 extern "C" SEXP _arrow_RecordBatch__num_rows(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type x(x_sexp);
@@ -4842,7 +4842,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-int Table__num_rows(const std::shared_ptr<arrow::Table>& x);
+r_vec_size Table__num_rows(const std::shared_ptr<arrow::Table>& x);
 extern "C" SEXP _arrow_Table__num_rows(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type x(x_sexp);
diff --git a/r/src/expression.cpp b/r/src/expression.cpp
index e9d8a2951bb..a845137e09d 100644
--- a/r/src/expression.cpp
+++ b/r/src/expression.cpp
@@ -87,7 +87,7 @@ std::shared_ptr<arrow::DataType> compute___expr__type(
     const std::shared_ptr<compute::Expression>& x,
     const std::shared_ptr<arrow::Schema>& schema) {
   auto bound = ValueOrStop(x->Bind(*schema));
-  return bound.type();
+  return bound.type()->GetSharedPtr();
 }
 
 // [[arrow::export]]

From 2b5b6a2f5ecfa1a62cdf9996ea00d7ed5978c086 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Tue, 5 Jul 2022 16:52:58 -0500
Subject: [PATCH 4/9] Fix compilation errors with gcc

---
 cpp/src/arrow/array/builder_base.cc           |  5 +--
 .../arrow/compute/kernels/scalar_nested.cc    |  2 +-
 cpp/src/arrow/type_fwd.h                      | 38 +++++++++----------
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index deadef061df..ff37cee5ba1 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -210,8 +210,8 @@ struct AppendScalarImpl {
 
   Status Visit(const DenseUnionType& type) { return MakeUnionArray(type); }
 
-  template <typename T, typename BuilderType = typename TypeTraits<T>::BuilderType>
-  Status AppendUnionScalar(const T& type, const Scalar& s, BuilderType* builder) {
+  Status AppendUnionScalar(const DenseUnionType& type, const Scalar& s,
+                           DenseUnionBuilder* builder) {
     const auto& scalar = checked_cast<const DenseUnionScalar&>(s);
     const auto scalar_field_index = type.child_ids()[scalar.type_code];
     RETURN_NOT_OK(builder->Append(scalar.type_code));
@@ -229,7 +229,6 @@ struct AppendScalarImpl {
     return Status::OK();
   }
 
-  template <>
   Status AppendUnionScalar(const SparseUnionType& type, const Scalar& s,
                            SparseUnionBuilder* builder) {
     // For each scalar,
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 7f0cc78ea2d..d3e72bea34b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -81,7 +81,7 @@ Status GetListElementIndex(const ExecValue& value, T* out) {
     *out = index_array.GetValues<T>(1)[0];
   }
   if (ARROW_PREDICT_FALSE(*out < 0)) {
-    return Status::Invalid("Index ", index,
+    return Status::Invalid("Index ", *out,
                            " is out of bounds: should be greater than or equal to 0");
   }
   return Status::OK();
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 6d34f1147e1..84a50a12eb3 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -416,43 +416,43 @@ struct Type {
 /// @{
 
 /// \brief Return a NullType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT null();
+ARROW_EXPORT const std::shared_ptr<DataType>& null();
 /// \brief Return a BooleanType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT boolean();
+ARROW_EXPORT const std::shared_ptr<DataType>& boolean();
 /// \brief Return a Int8Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT int8();
+ARROW_EXPORT const std::shared_ptr<DataType>& int8();
 /// \brief Return a Int16Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT int16();
+ARROW_EXPORT const std::shared_ptr<DataType>& int16();
 /// \brief Return a Int32Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT int32();
+ARROW_EXPORT const std::shared_ptr<DataType>& int32();
 /// \brief Return a Int64Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT int64();
+ARROW_EXPORT const std::shared_ptr<DataType>& int64();
 /// \brief Return a UInt8Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT uint8();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint8();
 /// \brief Return a UInt16Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT uint16();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint16();
 /// \brief Return a UInt32Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT uint32();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint32();
 /// \brief Return a UInt64Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT uint64();
+ARROW_EXPORT const std::shared_ptr<DataType>& uint64();
 /// \brief Return a HalfFloatType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT float16();
+ARROW_EXPORT const std::shared_ptr<DataType>& float16();
 /// \brief Return a FloatType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT float32();
+ARROW_EXPORT const std::shared_ptr<DataType>& float32();
 /// \brief Return a DoubleType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT float64();
+ARROW_EXPORT const std::shared_ptr<DataType>& float64();
 /// \brief Return a StringType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT utf8();
+ARROW_EXPORT const std::shared_ptr<DataType>& utf8();
 /// \brief Return a LargeStringType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT large_utf8();
+ARROW_EXPORT const std::shared_ptr<DataType>& large_utf8();
 /// \brief Return a BinaryType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT binary();
+ARROW_EXPORT const std::shared_ptr<DataType>& binary();
 /// \brief Return a LargeBinaryType instance
-const std::shared_ptr<DataType>& ARROW_EXPORT large_binary();
+ARROW_EXPORT const std::shared_ptr<DataType>& large_binary();
 /// \brief Return a Date32Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT date32();
+ARROW_EXPORT const std::shared_ptr<DataType>& date32();
 /// \brief Return a Date64Type instance
-const std::shared_ptr<DataType>& ARROW_EXPORT date64();
+ARROW_EXPORT const std::shared_ptr<DataType>& date64();
 
 /// \brief Create a FixedSizeBinaryType instance.
 ARROW_EXPORT

From 1e681450337331410248e65b42ff2e54846afd0e Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Tue, 5 Jul 2022 19:25:31 -0500
Subject: [PATCH 5/9] Fix R test failures

---
 r/tests/testthat/test-dataset-dplyr.R        | 2 +-
 r/tests/testthat/test-dplyr-funcs-datetime.R | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R
index fb1ef802e0a..fecda56c6c2 100644
--- a/r/tests/testthat/test-dataset-dplyr.R
+++ b/r/tests/testthat/test-dataset-dplyr.R
@@ -176,7 +176,7 @@ test_that("filter scalar validation doesn't crash (ARROW-7772)", {
     ds %>%
       filter(int == "fff", part == 1) %>%
       collect(),
-    "'equal' has no kernel matching input types .array.int32., scalar.string.."
+    "'equal' has no kernel matching input types .int32, string."
   )
 })
 
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 183170ff83e..94855fd7d63 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -838,7 +838,7 @@ test_that("semester works with temporal types and integers", {
       arrow_table() %>%
       mutate(sem_month_as_char_pad = semester(month_as_char_pad)) %>%
       collect(),
-    regexp = "NotImplemented: Function 'month' has no kernel matching input types (array[string])",
+    regexp = "NotImplemented: Function 'month' has no kernel matching input types (string)",
     fixed = TRUE
   )
 })
@@ -914,7 +914,7 @@ test_that("month() errors with double input and returns NA with int outside 1:12
       arrow_table() %>%
       mutate(month_dbl_input = month(month_as_double)) %>%
       collect(),
-    regexp = "Function 'month' has no kernel matching input types (array[double])",
+    regexp = "Function 'month' has no kernel matching input types (double)",
     fixed = TRUE
   )
 
@@ -923,7 +923,7 @@ test_that("month() errors with double input and returns NA with int outside 1:12
       record_batch() %>%
       mutate(month_dbl_input = month(month_as_double)) %>%
       collect(),
-    regexp = "Function 'month' has no kernel matching input types (array[double])",
+    regexp = "Function 'month' has no kernel matching input types (double)",
     fixed = TRUE
   )
 })

From 37008b8795e84a86c6e5f0a56b52cacb5d4299a4 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Tue, 5 Jul 2022 19:34:06 -0500
Subject: [PATCH 6/9] Fix bugs inside otel macros

---
 cpp/src/arrow/compute/exec/project_node.cc | 2 +-
 cpp/src/arrow/compute/function.cc          | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/exec/project_node.cc b/cpp/src/arrow/compute/exec/project_node.cc
index de01899b485..76925eb6139 100644
--- a/cpp/src/arrow/compute/exec/project_node.cc
+++ b/cpp/src/arrow/compute/exec/project_node.cc
@@ -82,7 +82,7 @@ class ProjectNode : public MapNode {
     for (size_t i = 0; i < exprs_.size(); ++i) {
       util::tracing::Span span;
       START_COMPUTE_SPAN(span, "Project",
-                         {{"project.type", exprs_[i].type().ToString()},
+                         {{"project.type", exprs_[i].type()->ToString()},
                           {"project.length", target.length},
                           {"project.expression", exprs_[i].ToString()}});
       ARROW_ASSIGN_OR_RAISE(Expression simplified_expr,
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index dd67de023e8..c610d804e8e 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -224,10 +224,10 @@ Result<Datum> ExecuteInternal(const Function& func, std::vector<Datum> args,
 
   util::tracing::Span span;
 
-  START_COMPUTE_SPAN(span, name(),
-                     {{"function.name", name()},
+  START_COMPUTE_SPAN(span, func.name(),
+                     {{"function.name", func.name()},
                       {"function.options", options ? options->ToString() : "<NULLPTR>"},
-                      {"function.kind", kind()}});
+                      {"function.kind", func.kind()}});
 
   // type-check Datum arguments here. Really we'd like to avoid this as much as
   // possible

From f548fd7cadd2563d54c7838df5252e57bc6c05c3 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Wed, 6 Jul 2022 06:51:26 -0500
Subject: [PATCH 7/9] Fix some bugs showing up with other compilers

---
 cpp/src/arrow/compute/exec.cc  | 3 ++-
 cpp/src/arrow/compute/kernel.h | 2 +-
 cpp/src/arrow/scalar.h         | 1 -
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index c5b1dfaca0e..e2335c2f55d 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -427,7 +427,8 @@ bool ExecSpanIterator::Next(ExecSpan* span) {
           value_offsets_[i] = arr.offset;
         } else {
           // Fill as zero-length array
-          internal::FillZeroLengthArray(carr.type().get(), &span->values[i].array);
+          ::arrow::internal::FillZeroLengthArray(carr.type().get(),
+                                                 &span->values[i].array);
           span->values[i].scalar = nullptr;
         }
         have_chunked_arrays_ = true;
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 1b412af525e..52334a8ba75 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -316,7 +316,7 @@ class ARROW_EXPORT OutputType {
   std::shared_ptr<DataType> type_;
 
   // For COMPUTED resolution
-  Resolver resolver_;
+  Resolver resolver_ = NULLPTR;
 };
 
 /// \brief Holds the input types and output type of the kernel.
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 897246b2aee..67a8eb46052 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -465,7 +465,6 @@ struct ARROW_EXPORT MapScalar : public BaseListScalar {
 
 struct ARROW_EXPORT FixedSizeListScalar : public BaseListScalar {
   using TypeClass = FixedSizeListType;
-  using BaseListScalar::BaseListScalar;
 
   FixedSizeListScalar(std::shared_ptr<Array> value, std::shared_ptr<DataType> type,
                       bool is_valid = true);

From 89ab44eff0fa9ff45faac40466ba12dbc7b5a9df Mon Sep 17 00:00:00 2001
From: Wes McKinney <wesm@apache.org>
Date: Thu, 7 Jul 2022 15:19:18 -0500
Subject: [PATCH 8/9] Code review feedback

---
 cpp/src/arrow/array/data.cc                   | 48 +++++++++++--------
 cpp/src/arrow/array/data.h                    |  8 +---
 cpp/src/arrow/compute/cast.cc                 | 39 ++++++---------
 cpp/src/arrow/compute/cast_internal.h         |  2 +-
 cpp/src/arrow/compute/exec.cc                 | 34 ++++++++-----
 .../arrow/compute/exec/expression_internal.h  |  4 +-
 cpp/src/arrow/compute/exec/test_util.h        |  5 ++
 cpp/src/arrow/compute/function.cc             | 12 +++--
 cpp/src/arrow/compute/function_benchmark.cc   |  2 +-
 cpp/src/arrow/compute/kernel.cc               | 30 +++++-------
 cpp/src/arrow/compute/kernel.h                |  9 +---
 cpp/src/arrow/compute/kernel_test.cc          | 32 ++++++-------
 .../arrow/compute/kernels/scalar_cast_test.cc |  2 +-
 cpp/src/arrow/scalar.h                        | 11 ++++-
 cpp/src/arrow/scalar_test.cc                  | 14 ++----
 cpp/src/arrow/type.h                          |  3 +-
 16 files changed, 124 insertions(+), 131 deletions(-)

diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 970bcaaaeb2..c1a597fea62 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -148,7 +148,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
     if (buffer) {
       SetBuffer(i, buffer);
     } else {
-      ClearBuffer(i);
+      this->buffers[i] = {};
     }
   }
 
@@ -161,7 +161,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
 
   // Makes sure any other buffers are seen as null / non-existent
   for (int i = static_cast<int>(data.buffers.size()); i < 3; ++i) {
-    ClearBuffer(i);
+    this->buffers[i] = {};
   }
 
   if (this->type->id() == Type::DICTIONARY) {
@@ -175,13 +175,14 @@ void ArraySpan::SetMembers(const ArrayData& data) {
   }
 }
 
+namespace {
+
 template <typename offset_type>
-void SetOffsetsForScalar(ArraySpan* span, uint8_t* buffer, int64_t value_size,
+void SetOffsetsForScalar(ArraySpan* span, offset_type* buffer, int64_t value_size,
                          int buffer_index = 1) {
-  auto offsets = reinterpret_cast<offset_type*>(buffer);
-  offsets[0] = 0;
-  offsets[1] = static_cast<offset_type>(value_size);
-  span->buffers[buffer_index].data = buffer;
+  buffer[0] = 0;
+  buffer[1] = static_cast<offset_type>(value_size);
+  span->buffers[buffer_index].data = reinterpret_cast<uint8_t*>(buffer);
   span->buffers[buffer_index].size = 2 * sizeof(offset_type);
 }
 
@@ -207,10 +208,12 @@ int GetNumBuffers(const DataType& type) {
   }
 }
 
+}  // namespace
+
 namespace internal {
 
 void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
-  memset(span->scratch_space, 0x00, 16);
+  memset(span->scratch_space, 0x00, sizeof(span->scratch_space));
 
   span->type = type;
   span->length = 0;
@@ -221,7 +224,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
   }
 
   for (int i = num_buffers; i < 3; ++i) {
-    span->ClearBuffer(i);
+    span->buffers[i] = {};
   }
 
   // Fill children
@@ -242,9 +245,9 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
 
   Type::type type_id = value.type->id();
 
-  // Populate null count and validity bitmap (only for non-union types)
+  // Populate null count and validity bitmap (only for non-union/null types)
   this->null_count = value.is_valid ? 0 : 1;
-  if (!is_union(type_id)) {
+  if (!is_union(type_id) && type_id != Type::NA) {
     this->buffers[0].data = value.is_valid ? &kTrueBit : &kFalseBit;
     this->buffers[0].size = 1;
   }
@@ -275,10 +278,12 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
       data_size = scalar.value->size();
     }
     if (is_binary_like(type_id)) {
-      SetOffsetsForScalar<int32_t>(this, this->scratch_space, data_size);
+      SetOffsetsForScalar<int32_t>(this, reinterpret_cast<int32_t*>(this->scratch_space),
+                                   data_size);
     } else {
       // is_large_binary_like
-      SetOffsetsForScalar<int64_t>(this, this->scratch_space, data_size);
+      SetOffsetsForScalar<int64_t>(this, reinterpret_cast<int64_t*>(this->scratch_space),
+                                   data_size);
     }
     this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
     this->buffers[2].size = data_size;
@@ -303,13 +308,14 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
     }
 
     if (type_id == Type::LIST || type_id == Type::MAP) {
-      SetOffsetsForScalar<int32_t>(this, this->scratch_space, value_length);
+      SetOffsetsForScalar<int32_t>(this, reinterpret_cast<int32_t*>(this->scratch_space),
+                                   value_length);
     } else if (type_id == Type::LARGE_LIST) {
-      SetOffsetsForScalar<int64_t>(this, this->scratch_space, value_length);
+      SetOffsetsForScalar<int64_t>(this, reinterpret_cast<int64_t*>(this->scratch_space),
+                                   value_length);
     } else {
       // FIXED_SIZE_LIST: does not have a second buffer
-      this->buffers[1].data = nullptr;
-      this->buffers[1].size = 0;
+      this->buffers[1] = {};
     }
   } else if (type_id == Type::STRUCT) {
     const auto& scalar = checked_cast<const StructScalar&>(value);
@@ -320,8 +326,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
     }
   } else if (is_union(type_id)) {
     // First buffer is kept null since unions have no validity vector
-    this->buffers[0].data = nullptr;
-    this->buffers[0].size = 0;
+    this->buffers[0] = {};
 
     this->buffers[1].data = this->scratch_space;
     this->buffers[1].size = 1;
@@ -332,13 +337,14 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
     if (type_id == Type::DENSE_UNION) {
       const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
       // Has offset; start 4 bytes in so it's aligned to a 32-bit boundaries
-      SetOffsetsForScalar<int32_t>(this, this->scratch_space + sizeof(int32_t), 1,
+      SetOffsetsForScalar<int32_t>(this,
+                                   reinterpret_cast<int32_t*>(this->scratch_space) + 1, 1,
                                    /*buffer_index=*/2);
       // We can't "see" the other arrays in the union, but we put the "active"
       // union array in the right place and fill zero-length arrays for the
       // others
       const std::vector<int>& child_ids =
-          static_cast<const UnionType*>(this->type)->child_ids();
+          checked_cast<const UnionType*>(this->type)->child_ids();
       DCHECK_GE(scalar.type_code, 0);
       DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
       for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index b76ab597107..fddc60293d8 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -269,7 +269,7 @@ struct ARROW_EXPORT ArraySpan {
   // 16 bytes of scratch space to enable this ArraySpan to be a view onto
   // scalar values including binary scalars (where we need to create a buffer
   // that looks like two 32-bit or 64-bit offsets)
-  uint8_t scratch_space[16];
+  alignas(64) uint8_t scratch_space[16];
 
   ArraySpan() = default;
 
@@ -295,12 +295,6 @@ struct ARROW_EXPORT ArraySpan {
     this->buffers[index].owner = &buffer;
   }
 
-  void ClearBuffer(int index) {
-    this->buffers[index].data = NULLPTR;
-    this->buffers[index].size = 0;
-    this->buffers[index].owner = NULLPTR;
-  }
-
   const ArraySpan& dictionary() const { return child_data[0]; }
 
   /// \brief Return the number of buffers (out of 3) that are used to
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 21257e05602..52aecf3e45a 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -66,25 +66,6 @@ void InitCastTable() {
 
 void EnsureInitCastTable() { std::call_once(cast_table_initialized, InitCastTable); }
 
-// Private version of GetCastFunction with better error reporting
-// if the input type is known.
-Result<std::shared_ptr<CastFunction>> GetCastFunctionInternal(
-    const TypeHolder& to_type, const DataType* from_type = nullptr) {
-  internal::EnsureInitCastTable();
-  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
-  if (it == internal::g_cast_table.end()) {
-    if (from_type != nullptr) {
-      return Status::NotImplemented("Unsupported cast from ", *from_type, " to ",
-                                    *to_type,
-                                    " (no available cast function for target type)");
-    } else {
-      return Status::NotImplemented("Unsupported cast to ", *to_type,
-                                    " (no available cast function for target type)");
-    }
-  }
-  return it->second;
-}
-
 const FunctionDoc cast_doc{"Cast values to another data type",
                            ("Behavior when values wouldn't fit in the target type\n"
                             "can be controlled through CastOptions."),
@@ -116,10 +97,13 @@ class CastMetaFunction : public MetaFunction {
     if (args[0].type()->Equals(*cast_options->to_type)) {
       return args[0];
     }
-    ARROW_ASSIGN_OR_RAISE(
-        std::shared_ptr<CastFunction> cast_func,
-        GetCastFunctionInternal(cast_options->to_type, args[0].type().get()));
-    return cast_func->Execute(args, options, ctx);
+    Result<std::shared_ptr<CastFunction>> result =
+        GetCastFunction(*cast_options->to_type);
+    if (!result.ok()) {
+      Status s = result.status();
+      return s.WithMessage(s.message(), " from ", *args[0].type());
+    }
+    return (*result)->Execute(args, options, ctx);
   }
 };
 
@@ -201,8 +185,13 @@ Result<const Kernel*> CastFunction::DispatchExact(
   return candidate_kernels[0];
 }
 
-Result<std::shared_ptr<CastFunction>> GetCastFunction(const TypeHolder& to_type) {
-  return internal::GetCastFunctionInternal(to_type);
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const DataType& to_type) {
+  internal::EnsureInitCastTable();
+  auto it = internal::g_cast_table.find(static_cast<int>(to_type.id()));
+  if (it == internal::g_cast_table.end()) {
+    return Status::NotImplemented("Unsupported cast to ", to_type);
+  }
+  return it->second;
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/cast_internal.h b/cpp/src/arrow/compute/cast_internal.h
index bfa2a110cd7..f00a6cdbf4d 100644
--- a/cpp/src/arrow/compute/cast_internal.h
+++ b/cpp/src/arrow/compute/cast_internal.h
@@ -65,7 +65,7 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts();
 std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts();
 
 ARROW_EXPORT
-Result<std::shared_ptr<CastFunction>> GetCastFunction(const TypeHolder& to_type);
+Result<std::shared_ptr<CastFunction>> GetCastFunction(const DataType& to_type);
 
 }  // namespace internal
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index e2335c2f55d..e5e256ea6dd 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -321,6 +321,21 @@ bool ExecBatchIterator::Next(ExecBatch* batch) {
 // ----------------------------------------------------------------------
 // ExecSpanIterator; to eventually replace ExecBatchIterator
 
+namespace {
+
+void PromoteExecSpanScalars(ExecSpan* span) {
+  // In the "all scalar" case, we "promote" the scalars to ArraySpans of
+  // length 1, since the kernel implementations do not handle the all
+  // scalar case
+  for (int i = 0; i < span->num_values(); ++i) {
+    ExecValue* value = &span->values[i];
+    if (value->is_scalar()) {
+      value->array.FillFromScalar(*value->scalar);
+      value->scalar = nullptr;
+    }
+  }
+}
+
 bool CheckIfAllScalar(const ExecBatch& batch) {
   for (const Datum& value : batch.values) {
     if (!value.is_scalar()) {
@@ -331,6 +346,8 @@ bool CheckIfAllScalar(const ExecBatch& batch) {
   return batch.num_values() > 0;
 }
 
+}  // namespace
+
 Status ExecSpanIterator::Init(const ExecBatch& batch, int64_t max_chunksize) {
   if (batch.num_values() > 0) {
     // Validate arguments
@@ -387,19 +404,6 @@ int64_t ExecSpanIterator::GetNextChunkSpan(int64_t iteration_size, ExecSpan* spa
   return iteration_size;
 }
 
-void PromoteExecSpanScalars(ExecSpan* span) {
-  // In the "all scalar" case, we "promote" the scalars to ArraySpans of
-  // length 1, since the kernel implementations do not handle the all
-  // scalar case
-  for (int i = 0; i < span->num_values(); ++i) {
-    ExecValue* value = &span->values[i];
-    if (value->is_scalar()) {
-      value->array.FillFromScalar(*value->scalar);
-      value->scalar = nullptr;
-    }
-  }
-}
-
 bool ExecSpanIterator::Next(ExecSpan* span) {
   if (!initialized_) {
     span->length = 0;
@@ -996,6 +1000,8 @@ class ScalarExecutor : public KernelExecutorImpl<ScalarKernel> {
   ExecSpanIterator span_iterator_;
 };
 
+namespace {
+
 Status CheckCanExecuteChunked(const VectorKernel* kernel) {
   if (kernel->exec_chunked == nullptr) {
     return Status::Invalid(
@@ -1011,6 +1017,8 @@ Status CheckCanExecuteChunked(const VectorKernel* kernel) {
   return Status::OK();
 }
 
+}  // namespace
+
 class VectorExecutor : public KernelExecutorImpl<VectorKernel> {
  public:
   Status Execute(const ExecBatch& batch, ExecListener* listener) override {
diff --git a/cpp/src/arrow/compute/exec/expression_internal.h b/cpp/src/arrow/compute/exec/expression_internal.h
index 7490d116c54..027c954c6d0 100644
--- a/cpp/src/arrow/compute/exec/expression_internal.h
+++ b/cpp/src/arrow/compute/exec/expression_internal.h
@@ -282,9 +282,9 @@ inline Result<std::shared_ptr<compute::Function>> GetFunction(
     return exec_context->func_registry()->GetFunction(call.function_name);
   }
   // XXX this special case is strange; why not make "cast" a ScalarFunction?
-  const auto& to_type =
+  const TypeHolder& to_type =
       ::arrow::internal::checked_cast<const compute::CastOptions&>(*call.options).to_type;
-  return GetCastFunction(to_type);
+  return GetCastFunction(*to_type);
 }
 
 /// Modify an Expression with pre-order and post-order visitation.
diff --git a/cpp/src/arrow/compute/exec/test_util.h b/cpp/src/arrow/compute/exec/test_util.h
index 64f725deafd..ddbded64d42 100644
--- a/cpp/src/arrow/compute/exec/test_util.h
+++ b/cpp/src/arrow/compute/exec/test_util.h
@@ -47,6 +47,11 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector<ExecNode*
 ARROW_TESTING_EXPORT
 ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types, util::string_view json);
 
+/// \brief Shape qualifier for value types. In certain instances
+/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
+/// other kernels arguments can be arrays or scalars
+enum class ArgShape { ANY, ARRAY, SCALAR };
+
 ARROW_TESTING_EXPORT
 ExecBatch ExecBatchFromJSON(const std::vector<TypeHolder>& types,
                             const std::vector<ArgShape>& shapes, util::string_view json);
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c610d804e8e..12d80a8c9ae 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -189,9 +189,7 @@ Result<const Kernel*> Function::DispatchBest(std::vector<TypeHolder>* values) co
 
 namespace {
 
-/// \brief Check that each Datum is of a "value" type, which means either
-/// SCALAR, ARRAY, or CHUNKED_ARRAY.
-Status CheckAllValues(const std::vector<Datum>& values) {
+Status CheckAllArrayOrScalar(const std::vector<Datum>& values) {
   for (const auto& value : values) {
     if (!value.is_value()) {
       return Status::Invalid("Tried executing function with non-value type: ",
@@ -231,7 +229,7 @@ Result<Datum> ExecuteInternal(const Function& func, std::vector<Datum> args,
 
   // type-check Datum arguments here. Really we'd like to avoid this as much as
   // possible
-  RETURN_NOT_OK(CheckAllValues(args));
+  RETURN_NOT_OK(CheckAllArrayOrScalar(args));
   std::vector<TypeHolder> in_types(args.size());
   for (size_t i = 0; i != args.size(); ++i) {
     in_types[i] = args[i].type().get();
@@ -279,7 +277,11 @@ Result<Datum> ExecuteInternal(const Function& func, std::vector<Datum> args,
     int64_t inferred_length = detail::InferBatchLength(input.values, &all_same_length);
     input.length = inferred_length;
     if (func.kind() == Function::SCALAR) {
-      DCHECK(passed_length == -1 || passed_length == inferred_length);
+      if (passed_length != -1 && passed_length != inferred_length) {
+        return Status::Invalid(
+            "Passed batch length for execution did not match actual"
+            " length of values for scalar function execution");
+      }
     } else if (func.kind() == Function::VECTOR) {
       auto vkernel = static_cast<const VectorKernel*>(kernel);
       if (!(all_same_length || !vkernel->can_execute_chunkwise)) {
diff --git a/cpp/src/arrow/compute/function_benchmark.cc b/cpp/src/arrow/compute/function_benchmark.cc
index bdd0bb6e986..791052358e7 100644
--- a/cpp/src/arrow/compute/function_benchmark.cc
+++ b/cpp/src/arrow/compute/function_benchmark.cc
@@ -72,7 +72,7 @@ void BM_CastDispatchBaseline(benchmark::State& state) {
   auto double_type = float64();
   CastOptions cast_options;
   cast_options.to_type = double_type;
-  ASSERT_OK_AND_ASSIGN(auto cast_function, internal::GetCastFunction(double_type));
+  ASSERT_OK_AND_ASSIGN(auto cast_function, internal::GetCastFunction(*double_type));
   ASSERT_OK_AND_ASSIGN(auto cast_kernel,
                        cast_function->DispatchExact({int_array->type()}));
   const auto& exec = static_cast<const ScalarKernel*>(cast_kernel)->exec;
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 1e3303473ef..9a0e9c986a2 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -87,9 +87,7 @@ class SameTypeIdMatcher : public TypeMatcher {
  public:
   explicit SameTypeIdMatcher(Type::type accepted_id) : accepted_id_(accepted_id) {}
 
-  bool Matches(const TypeHolder& type) const override {
-    return type.id() == accepted_id_;
-  }
+  bool Matches(const DataType& type) const override { return type.id() == accepted_id_; }
 
   std::string ToString() const override {
     std::stringstream ss;
@@ -124,11 +122,11 @@ class TimeUnitMatcher : public TypeMatcher {
   explicit TimeUnitMatcher(TimeUnit::type accepted_unit)
       : accepted_unit_(accepted_unit) {}
 
-  bool Matches(const TypeHolder& type) const override {
+  bool Matches(const DataType& type) const override {
     if (type.id() != ArrowType::type_id) {
       return false;
     }
-    const auto& time_type = checked_cast<const ArrowType&>(*type.type);
+    const auto& time_type = checked_cast<const ArrowType&>(type);
     return time_type.unit() == accepted_unit_;
   }
 
@@ -179,7 +177,7 @@ class IntegerMatcher : public TypeMatcher {
  public:
   IntegerMatcher() {}
 
-  bool Matches(const TypeHolder& type) const override { return is_integer(type.id()); }
+  bool Matches(const DataType& type) const override { return is_integer(type.id()); }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -198,7 +196,7 @@ class PrimitiveMatcher : public TypeMatcher {
  public:
   PrimitiveMatcher() {}
 
-  bool Matches(const TypeHolder& type) const override { return is_primitive(type.id()); }
+  bool Matches(const DataType& type) const override { return is_primitive(type.id()); }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -217,9 +215,7 @@ class BinaryLikeMatcher : public TypeMatcher {
  public:
   BinaryLikeMatcher() {}
 
-  bool Matches(const TypeHolder& type) const override {
-    return is_binary_like(type.id());
-  }
+  bool Matches(const DataType& type) const override { return is_binary_like(type.id()); }
 
   bool Equals(const TypeMatcher& other) const override {
     if (this == &other) {
@@ -239,7 +235,7 @@ class LargeBinaryLikeMatcher : public TypeMatcher {
  public:
   LargeBinaryLikeMatcher() {}
 
-  bool Matches(const TypeHolder& type) const override {
+  bool Matches(const DataType& type) const override {
     return is_large_binary_like(type.id());
   }
 
@@ -257,7 +253,7 @@ class FixedSizeBinaryLikeMatcher : public TypeMatcher {
  public:
   FixedSizeBinaryLikeMatcher() {}
 
-  bool Matches(const TypeHolder& type) const override {
+  bool Matches(const DataType& type) const override {
     return is_fixed_size_binary(type.id());
   }
 
@@ -335,10 +331,10 @@ bool InputType::Equals(const InputType& other) const {
   }
 }
 
-bool InputType::Matches(const TypeHolder& type) const {
+bool InputType::Matches(const DataType& type) const {
   switch (kind_) {
     case InputType::EXACT_TYPE:
-      return type_->Equals(*type.type);
+      return type_->Equals(type);
     case InputType::USE_TYPE_MATCHER:
       return type_matcher_->Matches(type);
     default:
@@ -357,7 +353,7 @@ bool InputType::Matches(const Datum& value) const {
       DCHECK(false);
       return false;
   }
-  return Matches(value.type().get());
+  return Matches(*value.type());
 }
 
 const std::shared_ptr<DataType>& InputType::type() const {
@@ -437,7 +433,7 @@ bool KernelSignature::Equals(const KernelSignature& other) const {
 bool KernelSignature::MatchesInputs(const std::vector<TypeHolder>& types) const {
   if (is_varargs_) {
     for (size_t i = 0; i < types.size(); ++i) {
-      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(types[i])) {
+      if (!in_types_[std::min(i, in_types_.size() - 1)].Matches(*types[i])) {
         return false;
       }
     }
@@ -446,7 +442,7 @@ bool KernelSignature::MatchesInputs(const std::vector<TypeHolder>& types) const
       return false;
     }
     for (size_t i = 0; i < in_types_.size(); ++i) {
-      if (!in_types_[i].Matches(types[i])) {
+      if (!in_types_[i].Matches(*types[i])) {
         return false;
       }
     }
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 52334a8ba75..5463a2de579 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -101,7 +101,7 @@ struct ARROW_EXPORT TypeMatcher {
   virtual ~TypeMatcher() = default;
 
   /// \brief Return true if this matcher accepts the data type.
-  virtual bool Matches(const TypeHolder& type) const = 0;
+  virtual bool Matches(const DataType& type) const = 0;
 
   /// \brief A human-interpretable string representation of what the type
   /// matcher checks for, usable when printing KernelSignature or formatting
@@ -143,11 +143,6 @@ ARROW_EXPORT std::shared_ptr<TypeMatcher> Primitive();
 
 }  // namespace match
 
-/// \brief Shape qualifier for value types. In certain instances
-/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in
-/// other kernels arguments can be arrays or scalars
-enum class ArgShape { ANY, ARRAY, SCALAR };
-
 /// \brief An object used for type-checking arguments to be passed to a kernel
 /// and stored in a KernelSignature. The type-checking rule can be supplied
 /// either with an exact DataType instance or a custom TypeMatcher.
@@ -213,7 +208,7 @@ class ARROW_EXPORT InputType {
   bool Matches(const Datum& value) const;
 
   /// \brief Return true if the type matches this InputType
-  bool Matches(const TypeHolder& type) const;
+  bool Matches(const DataType& type) const;
 
   /// \brief The type matching rule that this InputType uses.
   Kind kind() const { return kind_; }
diff --git a/cpp/src/arrow/compute/kernel_test.cc b/cpp/src/arrow/compute/kernel_test.cc
index d995cca354c..2676e93c3d7 100644
--- a/cpp/src/arrow/compute/kernel_test.cc
+++ b/cpp/src/arrow/compute/kernel_test.cc
@@ -36,8 +36,8 @@ namespace compute {
 
 TEST(TypeMatcher, SameTypeId) {
   std::shared_ptr<TypeMatcher> matcher = match::SameTypeId(Type::DECIMAL);
-  ASSERT_TRUE(matcher->Matches(decimal(12, 2)));
-  ASSERT_FALSE(matcher->Matches(int8()));
+  ASSERT_TRUE(matcher->Matches(*decimal(12, 2)));
+  ASSERT_FALSE(matcher->Matches(*int8()));
 
   ASSERT_EQ("Type::DECIMAL128", matcher->ToString());
 
@@ -50,11 +50,11 @@ TEST(TypeMatcher, TimestampTypeUnit) {
   auto matcher = match::TimestampTypeUnit(TimeUnit::MILLI);
   auto matcher2 = match::Time32TypeUnit(TimeUnit::MILLI);
 
-  ASSERT_TRUE(matcher->Matches(timestamp(TimeUnit::MILLI)));
-  ASSERT_TRUE(matcher->Matches(timestamp(TimeUnit::MILLI, "utc")));
-  ASSERT_FALSE(matcher->Matches(timestamp(TimeUnit::SECOND)));
-  ASSERT_FALSE(matcher->Matches(time32(TimeUnit::MILLI)));
-  ASSERT_TRUE(matcher2->Matches(time32(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher->Matches(*timestamp(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher->Matches(*timestamp(TimeUnit::MILLI, "utc")));
+  ASSERT_FALSE(matcher->Matches(*timestamp(TimeUnit::SECOND)));
+  ASSERT_FALSE(matcher->Matches(*time32(TimeUnit::MILLI)));
+  ASSERT_TRUE(matcher2->Matches(*time32(TimeUnit::MILLI)));
 
   // Check ToString representation
   ASSERT_EQ("timestamp(s)", match::TimestampTypeUnit(TimeUnit::SECOND)->ToString());
@@ -91,8 +91,8 @@ TEST(InputType, Constructors) {
   InputType ty2(Type::DECIMAL);
   ASSERT_EQ(InputType::USE_TYPE_MATCHER, ty2.kind());
   ASSERT_EQ("Type::DECIMAL128", ty2.ToString());
-  ASSERT_TRUE(ty2.type_matcher().Matches(decimal(12, 2)));
-  ASSERT_FALSE(ty2.type_matcher().Matches(int16()));
+  ASSERT_TRUE(ty2.type_matcher().Matches(*decimal(12, 2)));
+  ASSERT_FALSE(ty2.type_matcher().Matches(*int16()));
 
   // Implicit construction in a vector
   std::vector<InputType> types = {int8(), InputType(Type::DECIMAL)};
@@ -170,12 +170,12 @@ TEST(InputType, Hash) {
 TEST(InputType, Matches) {
   InputType input1 = int8();
 
-  ASSERT_TRUE(input1.Matches(int8()));
-  ASSERT_TRUE(input1.Matches(int8()));
-  ASSERT_FALSE(input1.Matches(int16()));
+  ASSERT_TRUE(input1.Matches(*int8()));
+  ASSERT_TRUE(input1.Matches(*int8()));
+  ASSERT_FALSE(input1.Matches(*int16()));
 
   InputType input2(Type::DECIMAL);
-  ASSERT_TRUE(input2.Matches(decimal(12, 2)));
+  ASSERT_TRUE(input2.Matches(*decimal(12, 2)));
 
   auto ty2 = decimal(12, 2);
   auto ty3 = float64();
@@ -184,7 +184,7 @@ TEST(InputType, Matches) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<Scalar> scalar2, arr2->GetScalar(0));
   ASSERT_TRUE(input2.Matches(Datum(arr2)));
   ASSERT_TRUE(input2.Matches(Datum(scalar2)));
-  ASSERT_FALSE(input2.Matches(ty3));
+  ASSERT_FALSE(input2.Matches(*ty3));
   ASSERT_FALSE(input2.Matches(arr3));
 }
 
@@ -289,8 +289,8 @@ TEST(KernelSignature, Basics) {
   KernelSignature sig(in_types, out_type);
   ASSERT_EQ(2, sig.in_types().size());
   ASSERT_TRUE(sig.in_types()[0].type()->Equals(*int8()));
-  ASSERT_TRUE(sig.in_types()[0].Matches(int8()));
-  ASSERT_TRUE(sig.in_types()[1].Matches(decimal(12, 2)));
+  ASSERT_TRUE(sig.in_types()[0].Matches(*int8()));
+  ASSERT_TRUE(sig.in_types()[1].Matches(*decimal(12, 2)));
 }
 
 TEST(KernelSignature, Equals) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index c90c5a0f285..963748c9f97 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -1798,7 +1798,7 @@ TEST(Cast, UnsupportedTargetType) {
   const auto to_type = dense_union({field("a", int32())});
 
   // Try through concrete API
-  const char* expected_message = "Unsupported cast from int32 to dense_union";
+  const char* expected_message = "Unsupported cast to dense_union<a: int32=0> from int32";
   EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, ::testing::HasSubstr(expected_message),
                                   Cast(*arr, to_type));
 
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 67a8eb46052..22532041eca 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -106,8 +106,7 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
   Status Accept(ScalarVisitor* visitor) const;
 
   /// \brief EXPERIMENTAL Enable obtaining shared_ptr<Scalar> from a const
-  /// Scalar& context. Implementation depends on enable_shared_from_this, but
-  /// we may change this in the future
+  /// Scalar& context.
   std::shared_ptr<Scalar> GetSharedPtr() const {
     return const_cast<Scalar*>(this)->shared_from_this();
   }
@@ -490,6 +489,8 @@ struct ARROW_EXPORT StructScalar : public Scalar {
 struct ARROW_EXPORT UnionScalar : public Scalar {
   int8_t type_code;
 
+  virtual const std::shared_ptr<Scalar>& child_value() const = 0;
+
  protected:
   UnionScalar(std::shared_ptr<DataType> type, int8_t type_code, bool is_valid)
       : Scalar(std::move(type), is_valid), type_code(type_code) {}
@@ -509,6 +510,10 @@ struct ARROW_EXPORT SparseUnionScalar : public UnionScalar {
 
   SparseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type);
 
+  const std::shared_ptr<Scalar>& child_value() const override {
+    return this->value[this->child_id];
+  }
+
   /// \brief Construct a SparseUnionScalar from a single value, versus having
   /// to construct a vector of scalars
   static std::shared_ptr<Scalar> FromValue(std::shared_ptr<Scalar> value, int field_index,
@@ -523,6 +528,8 @@ struct ARROW_EXPORT DenseUnionScalar : public UnionScalar {
   using ValueType = std::shared_ptr<Scalar>;
   ValueType value;
 
+  const std::shared_ptr<Scalar>& child_value() const override { return this->value; }
+
   DenseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr<DataType> type)
       : UnionScalar(std::move(type), type_code, value->is_valid),
         value(std::move(value)) {}
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index efc96061925..265ee3e94eb 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1373,15 +1373,6 @@ TEST(TestDictionaryScalar, Cast) {
   }
 }
 
-const Scalar& GetUnionValue(const Scalar& value) {
-  if (value.type->id() == Type::DENSE_UNION) {
-    return *checked_cast<const DenseUnionScalar&>(value).value;
-  } else {
-    const auto& union_scalar = checked_cast<const SparseUnionScalar&>(value);
-    return *union_scalar.value[union_scalar.child_id];
-  }
-}
-
 void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& expected,
                               const Scalar& expected_value) {
   ASSERT_OK_AND_ASSIGN(auto scalar, arr.GetScalar(index));
@@ -1389,7 +1380,8 @@ void CheckGetValidUnionScalar(const Array& arr, int64_t index, const Scalar& exp
   ASSERT_TRUE(scalar->Equals(expected));
 
   ASSERT_TRUE(scalar->is_valid);
-  ASSERT_TRUE(GetUnionValue(*scalar).Equals(expected_value));
+  ASSERT_TRUE(
+      checked_cast<const UnionScalar&>(*scalar).child_value()->Equals(expected_value));
 }
 
 void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
@@ -1397,7 +1389,7 @@ void CheckGetNullUnionScalar(const Array& arr, int64_t index) {
   ASSERT_TRUE(scalar->Equals(MakeNullScalar(arr.type())));
 
   ASSERT_FALSE(scalar->is_valid);
-  ASSERT_FALSE(GetUnionValue(*scalar).is_valid);
+  ASSERT_FALSE(checked_cast<const UnionScalar&>(*scalar).child_value()->is_valid);
 }
 
 std::shared_ptr<Scalar> MakeUnionScalar(const SparseUnionType& type,
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 33503c6be6a..f3ac2d62d82 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -189,8 +189,7 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   virtual int bit_width() const { return -1; }
 
   // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
-  // context. Implementation requires enable_shared_from_this but we may fix
-  // this in the future
+  // context.
   std::shared_ptr<DataType> GetSharedPtr() const {
     return const_cast<DataType*>(this)->shared_from_this();
   }

From 11e5f8f9ab1d4cfb57364cd832f447d5b0bbab6e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 7 Jul 2022 10:01:23 +0900
Subject: [PATCH 9/9] [GLib] Follow API change

---
 c_glib/arrow-glib/compute.cpp           |  9 ++---
 c_glib/arrow-glib/scalar.cpp            | 44 +++++++++++++++++++++----
 c_glib/test/test-large-binary-scalar.rb |  6 +++-
 c_glib/test/test-large-string-scalar.rb |  6 +++-
 c_glib/test/test-list-scalar.rb         | 12 ++++++-
 c_glib/test/test-map-scalar.rb          | 15 ++++++++-
 6 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 193ba2837b9..f3a29be5e43 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -5284,12 +5284,9 @@ GArrowCastOptions *
 garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options)
 {
   GArrowDataType *to_data_type = NULL;
-  if (arrow_options->to_type) {
-    auto arrow_copied_options = arrow_options->Copy();
-    auto arrow_copied_cast_options =
-      static_cast<arrow::compute::CastOptions *>(arrow_copied_options.get());
-    to_data_type =
-      garrow_data_type_new_raw(&(arrow_copied_cast_options->to_type));
+  if (arrow_options->to_type.type) {
+    auto arrow_to_data_type = arrow_options->to_type.GetSharedPtr();
+    to_data_type = garrow_data_type_new_raw(&arrow_to_data_type);
   }
   auto options =
     g_object_new(GARROW_TYPE_CAST_OPTIONS,
diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
index cef11578e1c..f8699f34eea 100644
--- a/c_glib/arrow-glib/scalar.cpp
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -2401,9 +2401,31 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
                                gint8 type_code,
                                GArrowScalar *value)
 {
-  return GARROW_SPARSE_UNION_SCALAR(
-    garrow_union_scalar_new<arrow::SparseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), type_code, value));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto &arrow_type_codes =
+    std::dynamic_pointer_cast<arrow::SparseUnionType>(
+      arrow_data_type)->type_codes();
+  auto arrow_value = garrow_scalar_get_raw(value);
+  arrow::SparseUnionScalar::ValueType arrow_field_values;
+  for (int i = 0; i < arrow_data_type->num_fields(); ++i) {
+    if (arrow_type_codes[i] == type_code) {
+      arrow_field_values.emplace_back(arrow_value);
+    } else {
+      arrow_field_values.emplace_back(
+        arrow::MakeNullScalar(arrow_data_type->field(i)->type()));
+    }
+  }
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::SparseUnionScalar>(arrow_field_values,
+                                                 type_code,
+                                                 arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return GARROW_SPARSE_UNION_SCALAR(scalar);
 }
 
 
@@ -2436,9 +2458,19 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
                               gint8 type_code,
                               GArrowScalar *value)
 {
-  return GARROW_DENSE_UNION_SCALAR(
-    garrow_union_scalar_new<arrow::DenseUnionScalar>(
-      GARROW_DATA_TYPE(data_type), type_code, value));
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto arrow_value = garrow_scalar_get_raw(value);
+  auto arrow_scalar =
+    std::static_pointer_cast<arrow::Scalar>(
+      std::make_shared<arrow::DenseUnionScalar>(arrow_value,
+                                                type_code,
+                                                arrow_data_type));
+  auto scalar = garrow_scalar_new_raw(&arrow_scalar,
+                                      "scalar", &arrow_scalar,
+                                      "data-type", data_type,
+                                      "value", value,
+                                      NULL);
+  return GARROW_DENSE_UNION_SCALAR(scalar);
 }
 
 
diff --git a/c_glib/test/test-large-binary-scalar.rb b/c_glib/test/test-large-binary-scalar.rb
index a6bc4addb10..d716e13f3ea 100644
--- a/c_glib/test/test-large-binary-scalar.rb
+++ b/c_glib/test/test-large-binary-scalar.rb
@@ -38,7 +38,11 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-BINARY.strip, @scalar.to_s)
+[
+  030102
+]
+                 BINARY
   end
 
   def test_value
diff --git a/c_glib/test/test-large-string-scalar.rb b/c_glib/test/test-large-string-scalar.rb
index 13e28f647ac..42e24a601b4 100644
--- a/c_glib/test/test-large-string-scalar.rb
+++ b/c_glib/test/test-large-string-scalar.rb
@@ -38,7 +38,11 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-STRING.strip, @scalar.to_s)
+[
+  "Hello"
+]
+                 STRING
   end
 
   def test_value
diff --git a/c_glib/test/test-list-scalar.rb b/c_glib/test/test-list-scalar.rb
index 3fda3f25bbb..0ddbf60bc05 100644
--- a/c_glib/test/test-list-scalar.rb
+++ b/c_glib/test/test-list-scalar.rb
@@ -41,7 +41,17 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-LIST.strip, @scalar.to_s)
+[
+  [
+    [
+      1,
+      2,
+      3
+    ]
+  ]
+]
+                 LIST
   end
 
   def test_value
diff --git a/c_glib/test/test-map-scalar.rb b/c_glib/test/test-map-scalar.rb
index 9c6eb69e0a8..1e004569ef3 100644
--- a/c_glib/test/test-map-scalar.rb
+++ b/c_glib/test/test-map-scalar.rb
@@ -56,7 +56,20 @@ def test_equal
   end
 
   def test_to_s
-    assert_equal("...", @scalar.to_s)
+    assert_equal(<<-MAP.strip, @scalar.to_s)
+[
+  keys:
+  [
+    "hello",
+    "world"
+  ]
+  values:
+  [
+    1,
+    2
+  ]
+]
+                 MAP
   end
 
   def test_value