diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index 1178c658c48..5df5e748f39 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -42,6 +42,7 @@ install(FILES
   rle-encoding.h
   sse-util.h
   stl.h
+  type_traits.h
   visibility.h
   DESTINATION include/arrow/util)
 
diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc
index 5a66d7e85ad..92bdcb5fc08 100644
--- a/cpp/src/arrow/util/bit-util-test.cc
+++ b/cpp/src/arrow/util/bit-util-test.cc
@@ -28,7 +28,6 @@
 
 #include "arrow/buffer.h"
 #include "arrow/memory_pool.h"
-#include "arrow/status.h"
 #include "arrow/test-util.h"
 #include "arrow/util/bit-stream-utils.h"
 #include "arrow/util/bit-util.h"
@@ -334,4 +333,36 @@ TEST(BitStreamUtil, ZigZag) {
   TestZigZag(-std::numeric_limits<int32_t>::max());
 }
 
+TEST(BitUtil, RoundTripLittleEndianTest) {
+  uint64_t value = 0xFF;
+
+#if ARROW_LITTLE_ENDIAN
+  uint64_t expected = value;
+#else
+  uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
+#endif
+
+  uint64_t little_endian_result = BitUtil::ToLittleEndian(value);
+  ASSERT_EQ(expected, little_endian_result);
+
+  uint64_t from_little_endian = BitUtil::FromLittleEndian(little_endian_result);
+  ASSERT_EQ(value, from_little_endian);
+}
+
+TEST(BitUtil, RoundTripBigEndianTest) {
+  uint64_t value = 0xFF;
+
+#if ARROW_LITTLE_ENDIAN
+  uint64_t expected = std::numeric_limits<uint64_t>::max() << 56;
+#else
+  uint64_t expected = value;
+#endif
+
+  uint64_t big_endian_result = BitUtil::ToBigEndian(value);
+  ASSERT_EQ(expected, big_endian_result);
+
+  uint64_t from_big_endian = BitUtil::FromBigEndian(big_endian_result);
+  ASSERT_EQ(value, from_big_endian);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 2509de21ffb..8043f90ccdf 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -56,6 +56,7 @@
 #include <vector>
 
 #include "arrow/util/macros.h"
+#include "arrow/util/type_traits.h"
 #include "arrow/util/visibility.h"
 
 #ifdef ARROW_USE_SSE
@@ -305,7 +306,7 @@ static inline uint32_t ByteSwap(uint32_t value) {
   return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value));
 }
 static inline int16_t ByteSwap(int16_t value) {
-  constexpr int16_t m = static_cast<int16_t>(0xff);
+  constexpr auto m = static_cast<int16_t>(0xff);
   return static_cast<int16_t>(((value >> 8) & m) | ((value & m) << 8));
 }
 static inline uint16_t ByteSwap(uint16_t value) {
@@ -331,8 +332,8 @@ static inline void ByteSwap(void* dst, const void* src, int len) {
       break;
   }
 
-  uint8_t* d = reinterpret_cast<uint8_t*>(dst);
-  const uint8_t* s = reinterpret_cast<const uint8_t*>(src);
+  auto d = reinterpret_cast<uint8_t*>(dst);
+  auto s = reinterpret_cast<const uint8_t*>(src);
   for (int i = 0; i < len; ++i) {
     d[i] = s[len - i - 1];
   }
@@ -341,36 +342,57 @@ static inline void ByteSwap(void* dst, const void* src, int len) {
 /// Converts to big endian format (if not already in big endian) from the
 /// machine's native endian format.
 #if ARROW_LITTLE_ENDIAN
-static inline int64_t ToBigEndian(int64_t value) { return ByteSwap(value); }
-static inline uint64_t ToBigEndian(uint64_t value) { return ByteSwap(value); }
-static inline int32_t ToBigEndian(int32_t value) { return ByteSwap(value); }
-static inline uint32_t ToBigEndian(uint32_t value) { return ByteSwap(value); }
-static inline int16_t ToBigEndian(int16_t value) { return ByteSwap(value); }
-static inline uint16_t ToBigEndian(uint16_t value) { return ByteSwap(value); }
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T ToBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T ToLittleEndian(T value) {
+  return value;
+}
 #else
-static inline int64_t ToBigEndian(int64_t val) { return val; }
-static inline uint64_t ToBigEndian(uint64_t val) { return val; }
-static inline int32_t ToBigEndian(int32_t val) { return val; }
-static inline uint32_t ToBigEndian(uint32_t val) { return val; }
-static inline int16_t ToBigEndian(int16_t val) { return val; }
-static inline uint16_t ToBigEndian(uint16_t val) { return val; }
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T ToBigEndian(T value) {
+  return value;
+}
 #endif
 
 /// Converts from big endian format to the machine's native endian format.
 #if ARROW_LITTLE_ENDIAN
-static inline int64_t FromBigEndian(int64_t value) { return ByteSwap(value); }
-static inline uint64_t FromBigEndian(uint64_t value) { return ByteSwap(value); }
-static inline int32_t FromBigEndian(int32_t value) { return ByteSwap(value); }
-static inline uint32_t FromBigEndian(uint32_t value) { return ByteSwap(value); }
-static inline int16_t FromBigEndian(int16_t value) { return ByteSwap(value); }
-static inline uint16_t FromBigEndian(uint16_t value) { return ByteSwap(value); }
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T FromBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T FromLittleEndian(T value) {
+  return value;
+}
 #else
-static inline int64_t FromBigEndian(int64_t val) { return val; }
-static inline uint64_t FromBigEndian(uint64_t val) { return val; }
-static inline int32_t FromBigEndian(int32_t val) { return val; }
-static inline uint32_t FromBigEndian(uint32_t val) { return val; }
-static inline int16_t FromBigEndian(int16_t val) { return val; }
-static inline uint16_t FromBigEndian(uint16_t val) { return val; }
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T FromBigEndian(T value) {
+  return value;
+}
+
+template <typename T,
+          typename =
+              EnableIfIsOneOf<T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t>>
+static inline T FromLittleEndian(T value) {
+  return ByteSwap(value);
+}
 #endif
 
 // Logical right shift for signed integer types
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 49d5c024950..7196b252c5b 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -27,6 +27,7 @@
 #pragma intrinsic(_BitScanReverse)
 #endif
 
+#include "arrow/util/bit-util.h"
 #include "arrow/util/decimal.h"
 #include "arrow/util/logging.h"
 
@@ -41,11 +42,13 @@ Decimal128::Decimal128(const std::string& str) : Decimal128() {
 }
 
 Decimal128::Decimal128(const uint8_t* bytes)
-    : Decimal128(reinterpret_cast<const int64_t*>(bytes)[0],
-                 reinterpret_cast<const uint64_t*>(bytes)[1]) {}
+    : Decimal128(BitUtil::FromLittleEndian(reinterpret_cast<const int64_t*>(bytes)[1]),
+                 BitUtil::FromLittleEndian(reinterpret_cast<const uint64_t*>(bytes)[0])) {
+}
 
 std::array<uint8_t, 16> Decimal128::ToBytes() const {
-  const uint64_t raw[] = {static_cast<uint64_t>(high_bits_), low_bits_};
+  const uint64_t raw[] = {BitUtil::ToLittleEndian(low_bits_),
+                          BitUtil::ToLittleEndian(static_cast<uint64_t>(high_bits_))};
   const auto* raw_data = reinterpret_cast<const uint8_t*>(raw);
   std::array<uint8_t, 16> out{{0}};
   std::copy(raw_data, raw_data + out.size(), out.begin());
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 58496a874f1..ba252bee480 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -53,7 +53,8 @@ class ARROW_EXPORT Decimal128 {
   /// \brief Parse the number from a base 10 string representation.
   explicit Decimal128(const std::string& value);
 
-  /// \brief Create an Decimal128 from an array of bytes
+  /// \brief Create an Decimal128 from an array of bytes. Bytes are assumed to be in
+  /// little endian byte order.
   explicit Decimal128(const uint8_t* bytes);
 
   /// \brief Negate the current value
@@ -104,7 +105,7 @@ class ARROW_EXPORT Decimal128 {
   /// \brief Get the low bits of the two's complement representation of the number.
   uint64_t low_bits() const { return low_bits_; }
 
-  /// \brief Return the raw bytes of the value.
+  /// \brief Return the raw bytes of the value in little-endian byte order.
   std::array<uint8_t, 16> ToBytes() const;
 
   /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
diff --git a/cpp/src/arrow/util/type_traits.h b/cpp/src/arrow/util/type_traits.h
new file mode 100644
index 00000000000..c05309af826
--- /dev/null
+++ b/cpp/src/arrow/util/type_traits.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_UTIL_TYPE_TRAITS_H
+#define ARROW_UTIL_TYPE_TRAITS_H
+
+#include <type_traits>
+
+namespace arrow {
+
+/// \brief Metafunction to allow checking if a type matches any of another set of types
+template <typename...>
+struct IsOneOf : std::false_type {};  /// Base case: nothing has matched
+
+template <typename T, typename U, typename... Args>
+struct IsOneOf<T, U, Args...> {
+  /// Recursive case: T == U or T matches any other types provided (not including U).
+  static constexpr bool value = std::is_same<T, U>::value || IsOneOf<T, Args...>::value;
+};
+
+/// \brief Shorthand for using IsOneOf + std::enable_if
+template <typename T, typename... Args>
+using EnableIfIsOneOf = typename std::enable_if<IsOneOf<T, Args...>::value, T>::type;
+
+}  // namespace arrow
+
+#endif  // ARROW_UTIL_TYPE_TRAITS_H
diff --git a/format/Layout.md b/format/Layout.md
index 3c21dbc0dc2..ebf93821aab 100644
--- a/format/Layout.md
+++ b/format/Layout.md
@@ -41,9 +41,8 @@ concepts, here is a small glossary to help disambiguate.
   or a fully-specified nested type. When we say slot we mean a relative type
   value, not necessarily any physical storage region.
 * Logical type: A data type that is implemented using some relative (physical)
-  type. For example, a Decimal value stored in 16 bytes could be stored in a
-  primitive array with slot size 16 bytes. Similarly, strings can be stored as
-  `List<1-byte>`.
+  type. For example, Decimal values are stored as 16 bytes in a fixed byte
+  size array. Similarly, strings can be stored as `List<1-byte>`.
 * Parent and child arrays: names to express relationships between physical
   value arrays in a nested type structure. For example, a `List<T>`-type parent
   array has a T-type array as its child (see more on lists below).
diff --git a/format/Metadata.md b/format/Metadata.md
index 80ca08ae13f..893b0a4742f 100644
--- a/format/Metadata.md
+++ b/format/Metadata.md
@@ -391,7 +391,8 @@ logical type, which have no children) and 3 buffers:
 
 ### Decimal
 
-TBD
+Decimals are represented as a 2's complement 128-bit (16 byte) signed integer
+in little-endian byte order.
 
 ### Timestamp