From fddf466b1186738b5a3d8f7614e244a1ae4d9b2a Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Mon, 23 Oct 2017 21:32:55 -0400
Subject: [PATCH 01/11] Stub out unit test for timestamp unit change

Change-Id: Ib64bf5d57bfec374d7b7e6d2fd5954a73b471a56
---
 cpp/src/arrow/compute/cast.h          |  5 ++++-
 cpp/src/arrow/compute/compute-test.cc | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 7a07512b2ad..c1c7ac60598 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -34,9 +34,12 @@ class FunctionContext;
 class UnaryKernel;
 
 struct CastOptions {
-  CastOptions() : allow_int_overflow(false) {}
+  CastOptions()
+      : allow_int_overflow(false),
+        allow_time_truncate(false) {}
 
   bool allow_int_overflow;
+  bool allow_time_truncate;
 };
 
 /// \since 0.7.0
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 8a595178d05..6e17236133a 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -270,6 +270,27 @@ TEST_F(TestCast, ToIntDowncastUnsafe) {
                                                     options);
 }
 
+TEST_F(TestCast, TimestampChangeUnit) {
+  auto CheckTimestampCast = [this](TimeUnit::type from_unit,
+                                   TimeUnit::type to_unit,
+                                   const std::vector<int64_t>& from_values,
+                                   const std::vector<int64_t>& to_values,
+                                   const std::vector<bool>& is_valid) {
+    CastOptions options;
+
+    CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
+        timestamp(from_unit), from_values, is_valid,
+        timestamp(to_unit), to_values, options);
+  };
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  vector<int64_t> v1 = {0, 100, 200, 1, 2};
+  vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
+
+  CheckTimestampCast(TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
+}
+
 TEST_F(TestCast, ToDouble) {
   CastOptions options;
   vector<bool> is_valid = {true, false, true, true, true};

From a0401363ddf2141cf7c535fdf621889e98dba8fc Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 15:48:19 -0400
Subject: [PATCH 02/11] Placeholders for time32/64/timestamp cast functions

Change-Id: I71d6783acf208ec4b66f91ff6f5a857ac19b80ac
---
 cpp/src/arrow/compute/cast.cc         | 65 ++++++++++++++++++++++++---
 cpp/src/arrow/compute/cast.h          |  4 +-
 cpp/src/arrow/compute/compute-test.cc |  7 ++-
 cpp/src/arrow/type.h                  | 12 +++--
 4 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index e8bbfd34706..4a45faf412d 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -68,6 +68,19 @@
 namespace arrow {
 namespace compute {
 
+namespace {
+
+void CopyData(const ArrayData& input, ArrayData* output) {
+  auto in_data = input.data();
+  output->length = in_data->length;
+  output->null_count = input.null_count();
+  output->buffers = in_data->buffers;
+  output->offset = in_data->offset;
+  output->child_data = in_data->child_data;
+}
+
+}  // namespace
+
 // ----------------------------------------------------------------------
 // Zero copy casts
 
@@ -77,7 +90,9 @@ struct is_zero_copy_cast {
 };
 
 template <typename O, typename I>
-struct is_zero_copy_cast<O, I, typename std::enable_if<std::is_same<I, O>::value>::type> {
+struct is_zero_copy_cast<
+    O, I, typename std::enable_if<std::is_same<I, O>::value &&
+                                  !std::is_base_of<ParametricType, O>::value>::type> {
   static constexpr bool value = true;
 };
 
@@ -102,10 +117,7 @@ template <typename O, typename I>
 struct CastFunctor<O, I, typename std::enable_if<is_zero_copy_cast<O, I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
-    auto in_data = input.data();
-    output->null_count = input.null_count();
-    output->buffers = in_data->buffers;
-    output->child_data = in_data->child_data;
+    CopyData(*input.data(), output);
   }
 };
 
@@ -259,6 +271,49 @@ struct CastFunctor<O, I,
   }
 };
 
+// ----------------------------------------------------------------------
+// From one timestamp to another
+
+template <>
+struct CastFunctor<TimestampType, TimestampType> {
+  void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
+                  ArrayData* output) {
+    // If units are the same, zero copy, otherwise convert
+
+
+  }
+};
+
+// ----------------------------------------------------------------------
+// From one time32 or time64 to another
+
+template <typename O, typename I>
+struct CastFunctor<O, I,
+                   typename std::enable_if<std::is_same<I, O>::value &&
+                                           std::is_base_of<TimeType, O>::value>::type> {
+  void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
+                  ArrayData* output) {
+    // If units are the same, zero copy, otherwise convert
+  }
+};
+
+// ----------------------------------------------------------------------
+// Between date32 and date64
+
+template <>
+struct CastFunctor<Date64Type, Date32Type> {
+  void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
+                  ArrayData* output) {
+  }
+};
+
+template <>
+struct CastFunctor<Date32Type, Date64Type> {
+  void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
+                  ArrayData* output) {
+  }
+};
+
 // ----------------------------------------------------------------------
 // Dictionary to other things
 
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index c1c7ac60598..d7bde20d607 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -34,9 +34,7 @@ class FunctionContext;
 class UnaryKernel;
 
 struct CastOptions {
-  CastOptions()
-      : allow_int_overflow(false),
-        allow_time_truncate(false) {}
+  CastOptions() : allow_int_overflow(false), allow_time_truncate(false) {}
 
   bool allow_int_overflow;
   bool allow_time_truncate;
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 6e17236133a..5bc89647336 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -271,16 +271,15 @@ TEST_F(TestCast, ToIntDowncastUnsafe) {
 }
 
 TEST_F(TestCast, TimestampChangeUnit) {
-  auto CheckTimestampCast = [this](TimeUnit::type from_unit,
-                                   TimeUnit::type to_unit,
+  auto CheckTimestampCast = [this](TimeUnit::type from_unit, TimeUnit::type to_unit,
                                    const std::vector<int64_t>& from_values,
                                    const std::vector<int64_t>& to_values,
                                    const std::vector<bool>& is_valid) {
     CastOptions options;
 
     CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
-        timestamp(from_unit), from_values, is_valid,
-        timestamp(to_unit), to_values, options);
+        timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
+        options);
   };
 
   vector<bool> is_valid = {true, false, true, true, true};
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 443828423e7..878fdf29efe 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -228,7 +228,11 @@ class ARROW_EXPORT FloatingPoint : public Number {
   virtual Precision precision() const = 0;
 };
 
-class ARROW_EXPORT NestedType : public DataType {
+/// \class ParametricType
+/// \brief A superclass for types having additional metadata
+class ParametricType {};
+
+class ARROW_EXPORT NestedType : public DataType, public ParametricType {
  public:
   using DataType::DataType;
 };
@@ -444,7 +448,7 @@ class ARROW_EXPORT BinaryType : public DataType, public NoExtraMeta {
 };
 
 // BinaryType type is represents lists of 1-byte values.
-class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType {
+class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
  public:
   static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
 
@@ -611,7 +615,7 @@ static inline std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
   return os;
 }
 
-class ARROW_EXPORT TimeType : public FixedWidthType {
+class ARROW_EXPORT TimeType : public FixedWidthType, public ParametricType {
  public:
   TimeUnit::type unit() const { return unit_; }
 
@@ -650,7 +654,7 @@ class ARROW_EXPORT Time64Type : public TimeType {
   std::string name() const override { return "time64"; }
 };
 
-class ARROW_EXPORT TimestampType : public FixedWidthType {
+class ARROW_EXPORT TimestampType : public FixedWidthType, public ParametricType {
  public:
   using Unit = TimeUnit;
 

From 884f6d25bd4e85365202f9a1affc2fcaa38b9c85 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 20:56:36 -0400
Subject: [PATCH 03/11] Baby steps

Change-Id: I1defe0410b2aada7eebf77f4e61ea34634836498
---
 cpp/src/arrow/compute/compute-test.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 5bc89647336..4ba39fe1d4d 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -105,6 +105,11 @@ class TestCast : public ComputeFixture, public TestBase {
       ArrayFromVector<OutType, O_TYPE>(out_type, out_values, &expected);
     }
     CheckPass(*input, *expected, out_type, options);
+
+    // Check a sliced variant
+    if (input->length() > 1) {
+      CheckPass(*input->Slice(1), *expected->Slice(1), out_type, options);
+    }
   }
 };
 

From 569dfb81f98e97b55463b82f41e01819ef021da4 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 21:48:02 -0400
Subject: [PATCH 04/11] Fix existing cast tests with sliced arrays

Change-Id: Ic3dd593eb8876f4171cdd2737d022b3731a0dae9
---
 cpp/src/arrow/compute/cast.cc         | 67 ++++++++++++++-------------
 cpp/src/arrow/compute/compute-test.cc |  6 +--
 cpp/src/arrow/test-util.h             | 21 +++++----
 3 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 4a45faf412d..aaf40f78994 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -68,9 +68,14 @@
 namespace arrow {
 namespace compute {
 
+template <typename T>
+inline const T* GetValuesAs(const ArrayData& data, int i) {
+  return reinterpret_cast<const T*>(data.buffers[1]->data()) + data.offset;
+}
+
 namespace {
 
-void CopyData(const ArrayData& input, ArrayData* output) {
+void CopyData(const Array& input, ArrayData* output) {
   auto in_data = input.data();
   output->length = in_data->length;
   output->null_count = input.null_count();
@@ -117,7 +122,7 @@ template <typename O, typename I>
 struct CastFunctor<O, I, typename std::enable_if<is_zero_copy_cast<O, I>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
-    CopyData(*input.data(), output);
+    CopyData(input, output);
   }
 };
 
@@ -131,6 +136,7 @@ struct CastFunctor<T, NullType, typename std::enable_if<
                   ArrayData* output) {
     // Simply initialize data to 0
     auto buf = output->buffers[1];
+    DCHECK_EQ(output->offset, 0);
     memset(buf->mutable_data(), 0, buf->size());
   }
 };
@@ -151,12 +157,16 @@ struct CastFunctor<T, BooleanType,
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
     using c_type = typename T::c_type;
-    const uint8_t* data = input.data()->buffers[1]->data();
-    auto out = reinterpret_cast<c_type*>(output->buffers[1]->mutable_data());
     constexpr auto kOne = static_cast<c_type>(1);
     constexpr auto kZero = static_cast<c_type>(0);
+
+    auto in_data = input.data();
+    internal::BitmapReader bit_reader(in_data->buffers[1]->data(), in_data->offset,
+                                      in_data->length);
+    auto out = reinterpret_cast<c_type*>(output->buffers[1]->mutable_data());
     for (int64_t i = 0; i < input.length(); ++i) {
-      *out++ = BitUtil::GetBit(data, i) ? kOne : kZero;
+      *out++ = bit_reader.IsSet() ? kOne : kZero;
+      bit_reader.Next();
     }
   }
 };
@@ -201,7 +211,9 @@ struct CastFunctor<O, I, typename std::enable_if<std::is_same<BooleanType, O>::v
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
     using in_type = typename I::c_type;
-    auto in_data = reinterpret_cast<const in_type*>(input.data()->buffers[1]->data());
+    DCHECK_EQ(output->offset, 0);
+
+    const in_type* in_data = GetValuesAs<in_type>(*input.data(), 1);
     uint8_t* out_data = reinterpret_cast<uint8_t*>(output->buffers[1]->mutable_data());
     for (int64_t i = 0; i < input.length(); ++i) {
       BitUtil::SetBitTo(out_data, i, (*in_data++) != 0);
@@ -216,12 +228,11 @@ struct CastFunctor<O, I,
                   ArrayData* output) {
     using in_type = typename I::c_type;
     using out_type = typename O::c_type;
+    DCHECK_EQ(output->offset, 0);
 
     auto in_offset = input.offset();
 
-    const auto& input_buffers = input.data()->buffers;
-
-    auto in_data = reinterpret_cast<const in_type*>(input_buffers[1]->data()) + in_offset;
+    const in_type* in_data = GetValuesAs<in_type>(*input.data(), 1);
     auto out_data = reinterpret_cast<out_type*>(output->buffers[1]->mutable_data());
 
     if (!options.allow_int_overflow) {
@@ -229,14 +240,15 @@ struct CastFunctor<O, I,
       constexpr in_type kMin = static_cast<in_type>(std::numeric_limits<out_type>::min());
 
       if (input.null_count() > 0) {
-        const uint8_t* is_valid = input_buffers[0]->data();
-        int64_t is_valid_offset = in_offset;
+        internal::BitmapReader is_valid_reader(input.data()->buffers[0]->data(),
+                                               in_offset, input.length());
         for (int64_t i = 0; i < input.length(); ++i) {
-          if (ARROW_PREDICT_FALSE(BitUtil::GetBit(is_valid, is_valid_offset++) &&
+          if (ARROW_PREDICT_FALSE(is_valid_reader.IsSet() &&
                                   (*in_data > kMax || *in_data < kMin))) {
             ctx->SetStatus(Status::Invalid("Integer value out of bounds"));
           }
           *out_data++ = static_cast<out_type>(*in_data++);
+          is_valid_reader.Next();
         }
       } else {
         for (int64_t i = 0; i < input.length(); ++i) {
@@ -263,7 +275,7 @@ struct CastFunctor<O, I,
     using in_type = typename I::c_type;
     using out_type = typename O::c_type;
 
-    auto in_data = reinterpret_cast<const in_type*>(input.data()->buffers[1]->data());
+    const in_type* in_data = GetValuesAs<in_type>(*input.data(), 1);
     auto out_data = reinterpret_cast<out_type*>(output->buffers[1]->mutable_data());
     for (int64_t i = 0; i < input.length(); ++i) {
       *out_data++ = static_cast<out_type>(*in_data++);
@@ -279,8 +291,6 @@ struct CastFunctor<TimestampType, TimestampType> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
     // If units are the same, zero copy, otherwise convert
-
-
   }
 };
 
@@ -303,15 +313,13 @@ struct CastFunctor<O, I,
 template <>
 struct CastFunctor<Date64Type, Date32Type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
-                  ArrayData* output) {
-  }
+                  ArrayData* output) {}
 };
 
 template <>
 struct CastFunctor<Date32Type, Date64Type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
-                  ArrayData* output) {
-  }
+                  ArrayData* output) {}
 };
 
 // ----------------------------------------------------------------------
@@ -326,9 +334,8 @@ void UnpackFixedSizeBinaryDictionary(FunctionContext* ctx, const Array& indices,
   internal::BitmapReader valid_bits_reader(indices.null_bitmap_data(), indices.offset(),
                                            indices.length());
 
-  const index_c_type* in =
-      reinterpret_cast<const index_c_type*>(indices.data()->buffers[1]->data()) +
-      indices.offset();
+  const index_c_type* in = GetValuesAs<index_c_type>(*indices.data(), 1);
+
   uint8_t* out = output->buffers[1]->mutable_data();
   int32_t byte_width =
       static_cast<const FixedSizeBinaryType&>(*output->type).byte_width();
@@ -391,9 +398,7 @@ Status UnpackBinaryDictionary(FunctionContext* ctx, const Array& indices,
   internal::BitmapReader valid_bits_reader(indices.null_bitmap_data(), indices.offset(),
                                            indices.length());
 
-  const index_c_type* in =
-      reinterpret_cast<const index_c_type*>(indices.data()->buffers[1]->data()) +
-      indices.offset();
+  const index_c_type* in = GetValuesAs<index_c_type>(*indices.data(), 1);
   for (int64_t i = 0; i < indices.length(); ++i) {
     if (valid_bits_reader.IsSet()) {
       int32_t length;
@@ -464,9 +469,7 @@ void UnpackPrimitiveDictionary(const Array& indices, const c_type* dictionary,
   internal::BitmapReader valid_bits_reader(indices.null_bitmap_data(), indices.offset(),
                                            indices.length());
 
-  const index_c_type* in =
-      reinterpret_cast<const index_c_type*>(indices.data()->buffers[1]->data()) +
-      indices.offset();
+  const index_c_type* in = GetValuesAs<index_c_type>(*indices.data(), 1);
   for (int64_t i = 0; i < indices.length(); ++i) {
     if (valid_bits_reader.IsSet()) {
       out[i] = dictionary[in[i]];
@@ -491,9 +494,8 @@ struct CastFunctor<T, DictionaryType,
     DCHECK(values_type.Equals(*output->type))
         << "Dictionary type: " << values_type << " target type: " << (*output->type);
 
-    auto dictionary =
-        reinterpret_cast<const c_type*>(type.dictionary()->data()->buffers[1]->data()) +
-        type.dictionary()->offset();
+    const c_type* dictionary = GetValuesAs<c_type>(*type.dictionary()->data(), 1);
+
     auto out = reinterpret_cast<c_type*>(output->buffers[1]->mutable_data());
     const Array& indices = *dict_array.indices();
     switch (indices.type()->id()) {
@@ -536,6 +538,9 @@ static Status AllocateIfNotPreallocated(FunctionContext* ctx, const Array& input
     int64_t bitmap_size = BitUtil::BytesForBits(length);
     RETURN_NOT_OK(ctx->Allocate(bitmap_size, &validity_bitmap));
     memset(validity_bitmap->mutable_data(), 0, bitmap_size);
+  } else if (input.offset() != 0) {
+    RETURN_NOT_OK(CopyBitmap(ctx->memory_pool(), validity_bitmap->data(), input.offset(),
+                             length, &validity_bitmap));
   }
 
   if (out->buffers.size() == 2) {
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 4ba39fe1d4d..8cb4ea93891 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -68,7 +68,7 @@ class TestCast : public ComputeFixture, public TestBase {
                  const std::shared_ptr<DataType>& out_type, const CastOptions& options) {
     std::shared_ptr<Array> result;
     ASSERT_OK(Cast(&ctx_, input, out_type, options, &result));
-    AssertArraysEqual(expected, *result);
+    ASSERT_ARRAYS_EQUAL(expected, *result);
   }
 
   template <typename InType, typename I_TYPE>
@@ -360,7 +360,7 @@ TEST_F(TestCast, FromNull) {
   ASSERT_EQ(length, result->null_count());
 
   // OK to look at bitmaps
-  AssertArraysEqual(*result, *result);
+  ASSERT_ARRAYS_EQUAL(*result, *result);
 }
 
 TEST_F(TestCast, PreallocatedMemory) {
@@ -398,7 +398,7 @@ TEST_F(TestCast, PreallocatedMemory) {
   std::shared_ptr<Array> expected;
   ArrayFromVector<Int64Type, int64_t>(int64(), is_valid, e1, &expected);
 
-  AssertArraysEqual(*expected, *result);
+  ASSERT_ARRAYS_EQUAL(*expected, *result);
 }
 
 template <typename TestType>
diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h
index 83ebdea4a85..044fb9476ca 100644
--- a/cpp/src/arrow/test-util.h
+++ b/cpp/src/arrow/test-util.h
@@ -281,15 +281,20 @@ Status MakeArray(const std::vector<uint8_t>& valid_bytes, const std::vector<T>&
   return builder->Finish(out);
 }
 
-void AssertArraysEqual(const Array& expected, const Array& actual) {
-  if (!actual.Equals(expected)) {
-    std::stringstream pp_result;
-    std::stringstream pp_expected;
+#define ASSERT_ARRAYS_EQUAL(LEFT, RIGHT)                                               \
+  do {                                                                                 \
+    if (!(LEFT).Equals((RIGHT))) {                                                     \
+      std::stringstream pp_result;                                                     \
+      std::stringstream pp_expected;                                                   \
+                                                                                       \
+      EXPECT_OK(PrettyPrint(RIGHT, 0, &pp_result));                                    \
+      EXPECT_OK(PrettyPrint(LEFT, 0, &pp_expected));                                   \
+      FAIL() << "Got: \n" << pp_result.str() << "\nExpected: \n" << pp_expected.str(); \
+    }                                                                                  \
+  } while (false)
 
-    EXPECT_OK(PrettyPrint(actual, 0, &pp_result));
-    EXPECT_OK(PrettyPrint(expected, 0, &pp_expected));
-    FAIL() << "Got: \n" << pp_result.str() << "\nExpected: \n" << pp_expected.str();
-  }
+void AssertArraysEqual(const Array& expected, const Array& actual) {
+  ASSERT_ARRAYS_EQUAL(expected, actual);
 }
 
 #define ASSERT_BATCHES_EQUAL(LEFT, RIGHT)    \

From 1bde233eac874af5c42f550f1bb92e3af959f35e Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 22:30:38 -0400
Subject: [PATCH 05/11] Cast table for timestamps

Change-Id: I6852192011b257119bc5c8832607f4d9ba191073
---
 cpp/src/arrow/compute/cast.cc | 55 +++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index aaf40f78994..97293404260 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -25,6 +25,7 @@
 #include <sstream>
 #include <string>
 #include <type_traits>
+#include <utility>
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
@@ -286,11 +287,65 @@ struct CastFunctor<O, I,
 // ----------------------------------------------------------------------
 // From one timestamp to another
 
+template <typename T>
+inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
+                      const bool is_multiply, const T factor,
+                      const Array& input, ArrayData* output) {
+  const int64_t* in_data = GetValuesAs<int64_t>(*input.data(), 1);
+  auto out_data = reinterpret_cast<int64_t*>(output->buffers[1]->mutable_data());
+
+  if (is_multiply) {
+    for (int64_t i = 0; i < input.length(); i++) {
+      out_data[i] = in_data[i] * factor;
+    }
+  } else {
+    if (options.allow_time_truncate) {
+      for (int64_t i = 0; i < input.length(); i++) {
+        out_data[i] = in_data[i] / factor;
+      }
+    } else {
+      for (int64_t i = 0; i < input.length(); i++) {
+        out_data[i] = in_data[i] / factor;
+        if (input.IsValid(i) && (out_data[i] * factor != in_data[i])) {
+          std::stringstream ss;
+          ss << "Casting from " << input.type()->ToString()
+             << " to " << output->type->ToString()
+             << " would lose data: " << in_data[i];
+          ctx->SetStatus(Status::Invalid(ss.str()));
+          break;
+        }
+      }
+    }
+  }
+}  // namespace
+
 template <>
 struct CastFunctor<TimestampType, TimestampType> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
     // If units are the same, zero copy, otherwise convert
+    const auto& in_type = static_cast<const TimestampType&>(*input.type());
+    const auto& out_type = static_cast<const TimestampType&>(*output->type);
+
+    if (in_type.unit() == out_type.unit()) {
+      CopyData(input, output);
+      return;
+    }
+
+    // {is_multiply, factor}
+    static const std::pair<bool, int64_t> kConversionTable[4][4] = {
+        {{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}},     // SECOND
+        {{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}},           // MILLI
+        {{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}},          // MICRO
+        {{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}},  // NANO
+    };
+
+    std::pair<bool, int64_t> conversion =
+        kConversionTable[static_cast<int>(in_type.unit())]
+                        [static_cast<int>(out_type.unit())];
+
+    ShiftTime<int64_t>(ctx, options, conversion.first, conversion.second,
+                       input, output);
   }
 };
 

From 6441a9549d1067d1f30265e36c12c5b8ae3e29ae Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 22:53:59 -0400
Subject: [PATCH 06/11] Write unit tests for timestamp to timestamp casts

Change-Id: Ied9180ef02a6835662911faabed70e986729ec86
---
 cpp/src/arrow/compute/cast.cc         | 40 +++++++++++----
 cpp/src/arrow/compute/compute-test.cc | 72 +++++++++++++++++++++++++--
 2 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 97293404260..ff080f442e2 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -319,6 +319,18 @@ inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
   }
 }  // namespace
 
+namespace {
+
+// {is_multiply, factor}
+const std::pair<bool, int64_t> kTimeConversionTable[4][4] = {
+  {{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}},     // SECOND
+  {{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}},           // MILLI
+  {{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}},          // MICRO
+  {{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}},  // NANO
+};
+
+}  // namespace
+
 template <>
 struct CastFunctor<TimestampType, TimestampType> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
@@ -332,17 +344,9 @@ struct CastFunctor<TimestampType, TimestampType> {
       return;
     }
 
-    // {is_multiply, factor}
-    static const std::pair<bool, int64_t> kConversionTable[4][4] = {
-        {{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}},     // SECOND
-        {{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}},           // MILLI
-        {{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}},          // MICRO
-        {{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}},  // NANO
-    };
-
     std::pair<bool, int64_t> conversion =
-        kConversionTable[static_cast<int>(in_type.unit())]
-                        [static_cast<int>(out_type.unit())];
+        kTimeConversionTable[static_cast<int>(in_type.unit())]
+      [static_cast<int>(out_type.unit())];
 
     ShiftTime<int64_t>(ctx, options, conversion.first, conversion.second,
                        input, output);
@@ -358,7 +362,23 @@ struct CastFunctor<O, I,
                                            std::is_base_of<TimeType, O>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
+    using T = typename O::c_type;
+
     // If units are the same, zero copy, otherwise convert
+    const auto& in_type = static_cast<const O&>(*input.type());
+    const auto& out_type = static_cast<const O&>(*output->type);
+
+    if (in_type.unit() == out_type.unit()) {
+      CopyData(input, output);
+      return;
+    }
+
+    std::pair<bool, int64_t> conversion =
+        kTimeConversionTable[static_cast<int>(in_type.unit())]
+      [static_cast<int>(out_type.unit())];
+
+    ShiftTime<T>(ctx, options, conversion.first, static_cast<T>(conversion.second),
+                 input, output);
   }
 };
 
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 8cb4ea93891..4ead432c33d 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -275,13 +275,14 @@ TEST_F(TestCast, ToIntDowncastUnsafe) {
                                                     options);
 }
 
-TEST_F(TestCast, TimestampChangeUnit) {
-  auto CheckTimestampCast = [this](TimeUnit::type from_unit, TimeUnit::type to_unit,
+TEST_F(TestCast, TimestampToTimestamp) {
+  CastOptions options;
+
+  auto CheckTimestampCast = [this](const CastOptions& options,
+                                   TimeUnit::type from_unit, TimeUnit::type to_unit,
                                    const std::vector<int64_t>& from_values,
                                    const std::vector<int64_t>& to_values,
                                    const std::vector<bool>& is_valid) {
-    CastOptions options;
-
     CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
         timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
         options);
@@ -289,12 +290,73 @@ TEST_F(TestCast, TimestampChangeUnit) {
 
   vector<bool> is_valid = {true, false, true, true, true};
 
+  // Multiply promotions
   vector<int64_t> v1 = {0, 100, 200, 1, 2};
   vector<int64_t> e1 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
+
+  vector<int64_t> v2 = {0, 100, 200, 1, 2};
+  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::MICRO, v2, e2, is_valid);
+
+  vector<int64_t> v3 = {0, 100, 200, 1, 2};
+  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
+  CheckTimestampCast(options, TimeUnit::SECOND, TimeUnit::NANO, v3, e3, is_valid);
 
-  CheckTimestampCast(TimeUnit::SECOND, TimeUnit::MILLI, v1, e1, is_valid);
+  vector<int64_t> v4 = {0, 100, 200, 1, 2};
+  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::MICRO, v4, e4, is_valid);
+
+  vector<int64_t> v5 = {0, 100, 200, 1, 2};
+  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::NANO, v5, e5, is_valid);
+
+  vector<int64_t> v6 = {0, 100, 200, 1, 2};
+  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::NANO, v6, e6, is_valid);
+
+  // Zero copy
+  std::shared_ptr<Array> arr;
+  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid,
+                                          v7, &arr);
+  CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
+
+  // Divide, truncate
+  vector<int64_t> v8 = {0, 100123, 200456, 1123, 2456};
+  vector<int64_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckTimestampCast(options, TimeUnit::MILLI, TimeUnit::SECOND, v8, e8, is_valid);
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::MILLI, v8, e8, is_valid);
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MICRO, v8, e8, is_valid);
+
+  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
+  vector<int64_t> e9 = {0, 100, 200, 1, 2};
+  CheckTimestampCast(options, TimeUnit::MICRO, TimeUnit::SECOND, v9, e9, is_valid);
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::MILLI, v9, e9, is_valid);
+
+  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
+  vector<int64_t> e10 = {0, 100, 200, 1, 2};
+  CheckTimestampCast(options, TimeUnit::NANO, TimeUnit::SECOND, v10, e10, is_valid);
+
+  // Disallow truncate, failures
+  options.allow_time_truncate = false;
+  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
+                            timestamp(TimeUnit::MILLI), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
+                            timestamp(TimeUnit::MILLI), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v10, is_valid,
+                            timestamp(TimeUnit::SECOND), options);
 }
 
+
 TEST_F(TestCast, ToDouble) {
   CastOptions options;
   vector<bool> is_valid = {true, false, true, true, true};

From 4911b09f50b21ea639d918fdb306edca7d27a92c Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 23:29:06 -0400
Subject: [PATCH 07/11] Add time casts

Change-Id: Id4613af068fc31371ac530920f2938d9a4231416
---
 cpp/src/arrow/compute/cast.cc         |  35 +++++----
 cpp/src/arrow/compute/compute-test.cc | 101 +++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index ff080f442e2..c462d05a9aa 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -287,25 +287,25 @@ struct CastFunctor<O, I,
 // ----------------------------------------------------------------------
 // From one timestamp to another
 
-template <typename T>
+template <typename in_type, typename out_type>
 inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
-                      const bool is_multiply, const T factor,
+                      const bool is_multiply, const int64_t factor,
                       const Array& input, ArrayData* output) {
-  const int64_t* in_data = GetValuesAs<int64_t>(*input.data(), 1);
-  auto out_data = reinterpret_cast<int64_t*>(output->buffers[1]->mutable_data());
+  const in_type* in_data = GetValuesAs<in_type>(*input.data(), 1);
+  auto out_data = reinterpret_cast<out_type*>(output->buffers[1]->mutable_data());
 
   if (is_multiply) {
     for (int64_t i = 0; i < input.length(); i++) {
-      out_data[i] = in_data[i] * factor;
+      out_data[i] = static_cast<out_type>(in_data[i] * factor);
     }
   } else {
     if (options.allow_time_truncate) {
       for (int64_t i = 0; i < input.length(); i++) {
-        out_data[i] = in_data[i] / factor;
+        out_data[i] = static_cast<out_type>(in_data[i] / factor);
       }
     } else {
       for (int64_t i = 0; i < input.length(); i++) {
-        out_data[i] = in_data[i] / factor;
+        out_data[i] = static_cast<out_type>(in_data[i] / factor);
         if (input.IsValid(i) && (out_data[i] * factor != in_data[i])) {
           std::stringstream ss;
           ss << "Casting from " << input.type()->ToString()
@@ -348,8 +348,8 @@ struct CastFunctor<TimestampType, TimestampType> {
         kTimeConversionTable[static_cast<int>(in_type.unit())]
       [static_cast<int>(out_type.unit())];
 
-    ShiftTime<int64_t>(ctx, options, conversion.first, conversion.second,
-                       input, output);
+    ShiftTime<int64_t, int64_t>(ctx, options, conversion.first, conversion.second,
+                                input, output);
   }
 };
 
@@ -358,11 +358,12 @@ struct CastFunctor<TimestampType, TimestampType> {
 
 template <typename O, typename I>
 struct CastFunctor<O, I,
-                   typename std::enable_if<std::is_same<I, O>::value &&
+                   typename std::enable_if<std::is_base_of<TimeType, I>::value &&
                                            std::is_base_of<TimeType, O>::value>::type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
                   ArrayData* output) {
-    using T = typename O::c_type;
+    using in_t = typename I::c_type;
+    using out_t = typename O::c_type;
 
     // If units are the same, zero copy, otherwise convert
     const auto& in_type = static_cast<const O&>(*input.type());
@@ -377,8 +378,8 @@ struct CastFunctor<O, I,
         kTimeConversionTable[static_cast<int>(in_type.unit())]
       [static_cast<int>(out_type.unit())];
 
-    ShiftTime<T>(ctx, options, conversion.first, static_cast<T>(conversion.second),
-                 input, output);
+    ShiftTime<in_t, out_t>(ctx, options, conversion.first, conversion.second,
+                           input, output);
   }
 };
 
@@ -737,9 +738,13 @@ class CastKernel : public UnaryKernel {
 
 #define DATE64_CASES(FN, IN_TYPE) FN(Date64Type, Date64Type);
 
-#define TIME32_CASES(FN, IN_TYPE) FN(Time32Type, Time32Type);
+#define TIME32_CASES(FN, IN_TYPE)               \
+  FN(Time32Type, Time32Type);                   \
+  FN(Time32Type, Time64Type);
 
-#define TIME64_CASES(FN, IN_TYPE) FN(Time64Type, Time64Type);
+#define TIME64_CASES(FN, IN_TYPE)               \
+  FN(Time64Type, Time32Type);                   \
+  FN(Time64Type, Time64Type);
 
 #define TIMESTAMP_CASES(FN, IN_TYPE) FN(TimestampType, TimestampType);
 
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index 4ead432c33d..dc9036db4f2 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -346,8 +346,8 @@ TEST_F(TestCast, TimestampToTimestamp) {
                             timestamp(TimeUnit::SECOND), options);
   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v8, is_valid,
                             timestamp(TimeUnit::MILLI), options);
-  CheckFails<TimestampType>(timestamp(TimeUnit::MILLI), v8, is_valid,
-                            timestamp(TimeUnit::SECOND), options);
+  CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v8, is_valid,
+                            timestamp(TimeUnit::MICRO), options);
   CheckFails<TimestampType>(timestamp(TimeUnit::MICRO), v9, is_valid,
                             timestamp(TimeUnit::SECOND), options);
   CheckFails<TimestampType>(timestamp(TimeUnit::NANO), v9, is_valid,
@@ -357,6 +357,103 @@ TEST_F(TestCast, TimestampToTimestamp) {
 }
 
 
+TEST_F(TestCast, TimeToTime) {
+  CastOptions options;
+
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  // Multiply promotions
+  vector<int32_t> v1 = {0, 100, 200, 1, 2};
+  vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
+      time32(TimeUnit::SECOND), v1, is_valid,
+      time32(TimeUnit::MILLI), e1, options);
+
+  vector<int32_t> v2 = {0, 100, 200, 1, 2};
+  vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::SECOND), v2, is_valid,
+      time64(TimeUnit::MICRO), e2, options);
+
+  vector<int32_t> v3 = {0, 100, 200, 1, 2};
+  vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::SECOND), v3, is_valid,
+      time64(TimeUnit::NANO), e3, options);
+
+  vector<int32_t> v4 = {0, 100, 200, 1, 2};
+  vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::MILLI), v4, is_valid,
+      time64(TimeUnit::MICRO), e4, options);
+
+  vector<int32_t> v5 = {0, 100, 200, 1, 2};
+  vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
+  CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
+      time32(TimeUnit::MILLI), v5, is_valid,
+      time64(TimeUnit::NANO), e5, options);
+
+  vector<int64_t> v6 = {0, 100, 200, 1, 2};
+  vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
+  CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
+      time64(TimeUnit::MICRO), v6, is_valid,
+      time64(TimeUnit::NANO), e6, options);
+
+  // Zero copy
+  std::shared_ptr<Array> arr;
+  vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<Time64Type, int64_t>(time64(TimeUnit::MICRO), is_valid, v7, &arr);
+  CheckZeroCopy(*arr, time64(TimeUnit::MICRO));
+
+  // Divide, truncate
+  vector<int32_t> v8 = {0, 100123, 200456, 1123, 2456};
+  vector<int32_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
+      time32(TimeUnit::MILLI), v8, is_valid,
+      time32(TimeUnit::SECOND), e8, options);
+  CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
+      time64(TimeUnit::MICRO), v8, is_valid,
+      time32(TimeUnit::MILLI), e8, options);
+  CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
+      time64(TimeUnit::NANO), v8, is_valid,
+      time64(TimeUnit::MICRO), e8, options);
+
+  vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
+  vector<int32_t> e9 = {0, 100, 200, 1, 2};
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::MICRO), v9, is_valid,
+      time32(TimeUnit::SECOND), e9, options);
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::NANO), v9, is_valid,
+      time32(TimeUnit::MILLI), e9, options);
+
+  vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
+  vector<int32_t> e10 = {0, 100, 200, 1, 2};
+  CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
+      time64(TimeUnit::NANO), v10, is_valid,
+      time32(TimeUnit::SECOND), e10, options);
+
+  // Disallow truncate, failures
+
+  options.allow_time_truncate = false;
+  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid,
+                         time32(TimeUnit::SECOND), options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid,
+                         time32(TimeUnit::MILLI), options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid,
+                         time64(TimeUnit::MICRO), options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid,
+                         time32(TimeUnit::SECOND), options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid,
+                         time32(TimeUnit::MILLI), options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid,
+                         time32(TimeUnit::SECOND), options);
+}
+
+
 TEST_F(TestCast, ToDouble) {
   CastOptions options;
   vector<bool> is_valid = {true, false, true, true, true};

From ac9845e330c0f0ac0864c6b4fcb367e2eb86e2c2 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 23:48:40 -0400
Subject: [PATCH 08/11] Implement date32<->date64 casts, unit tests

Change-Id: Iedc2f14ec80146fa808c9935368f266cb06138a5
---
 cpp/src/arrow/compute/cast.cc         |  57 +++++++------
 cpp/src/arrow/compute/compute-test.cc | 110 +++++++++++++++-----------
 2 files changed, 98 insertions(+), 69 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index c462d05a9aa..60c8f100cc6 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -289,8 +289,8 @@ struct CastFunctor<O, I,
 
 template <typename in_type, typename out_type>
 inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
-                      const bool is_multiply, const int64_t factor,
-                      const Array& input, ArrayData* output) {
+                      const bool is_multiply, const int64_t factor, const Array& input,
+                      ArrayData* output) {
   const in_type* in_data = GetValuesAs<in_type>(*input.data(), 1);
   auto out_data = reinterpret_cast<out_type*>(output->buffers[1]->mutable_data());
 
@@ -308,9 +308,8 @@ inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
         out_data[i] = static_cast<out_type>(in_data[i] / factor);
         if (input.IsValid(i) && (out_data[i] * factor != in_data[i])) {
           std::stringstream ss;
-          ss << "Casting from " << input.type()->ToString()
-             << " to " << output->type->ToString()
-             << " would lose data: " << in_data[i];
+          ss << "Casting from " << input.type()->ToString() << " to "
+             << output->type->ToString() << " would lose data: " << in_data[i];
           ctx->SetStatus(Status::Invalid(ss.str()));
           break;
         }
@@ -323,10 +322,10 @@ namespace {
 
 // {is_multiply, factor}
 const std::pair<bool, int64_t> kTimeConversionTable[4][4] = {
-  {{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}},     // SECOND
-  {{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}},           // MILLI
-  {{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}},          // MICRO
-  {{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}},  // NANO
+    {{true, 1}, {true, 1000}, {true, 1000000}, {true, 1000000000L}},     // SECOND
+    {{false, 1000}, {true, 1}, {true, 1000}, {true, 1000000}},           // MILLI
+    {{false, 1000000}, {false, 1000}, {true, 1}, {true, 1000}},          // MICRO
+    {{false, 1000000000L}, {false, 1000000}, {false, 1000}, {true, 1}},  // NANO
 };
 
 }  // namespace
@@ -346,10 +345,10 @@ struct CastFunctor<TimestampType, TimestampType> {
 
     std::pair<bool, int64_t> conversion =
         kTimeConversionTable[static_cast<int>(in_type.unit())]
-      [static_cast<int>(out_type.unit())];
+                            [static_cast<int>(out_type.unit())];
 
-    ShiftTime<int64_t, int64_t>(ctx, options, conversion.first, conversion.second,
-                                input, output);
+    ShiftTime<int64_t, int64_t>(ctx, options, conversion.first, conversion.second, input,
+                                output);
   }
 };
 
@@ -366,7 +365,7 @@ struct CastFunctor<O, I,
     using out_t = typename O::c_type;
 
     // If units are the same, zero copy, otherwise convert
-    const auto& in_type = static_cast<const O&>(*input.type());
+    const auto& in_type = static_cast<const I&>(*input.type());
     const auto& out_type = static_cast<const O&>(*output->type);
 
     if (in_type.unit() == out_type.unit()) {
@@ -376,26 +375,32 @@ struct CastFunctor<O, I,
 
     std::pair<bool, int64_t> conversion =
         kTimeConversionTable[static_cast<int>(in_type.unit())]
-      [static_cast<int>(out_type.unit())];
+                            [static_cast<int>(out_type.unit())];
 
-    ShiftTime<in_t, out_t>(ctx, options, conversion.first, conversion.second,
-                           input, output);
+    ShiftTime<in_t, out_t>(ctx, options, conversion.first, conversion.second, input,
+                           output);
   }
 };
 
 // ----------------------------------------------------------------------
 // Between date32 and date64
 
+constexpr int64_t kMillisecondsInDay = 86400000;
+
 template <>
 struct CastFunctor<Date64Type, Date32Type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
-                  ArrayData* output) {}
+                  ArrayData* output) {
+    ShiftTime<int32_t, int64_t>(ctx, options, true, kMillisecondsInDay, input, output);
+  }
 };
 
 template <>
 struct CastFunctor<Date32Type, Date64Type> {
   void operator()(FunctionContext* ctx, const CastOptions& options, const Array& input,
-                  ArrayData* output) {}
+                  ArrayData* output) {
+    ShiftTime<int64_t, int32_t>(ctx, options, false, kMillisecondsInDay, input, output);
+  }
 };
 
 // ----------------------------------------------------------------------
@@ -734,16 +739,20 @@ class CastKernel : public UnaryKernel {
   FN(Int64Type, Time64Type);     \
   FN(Int64Type, Date64Type);
 
-#define DATE32_CASES(FN, IN_TYPE) FN(Date32Type, Date32Type);
+#define DATE32_CASES(FN, IN_TYPE) \
+  FN(Date32Type, Date32Type);     \
+  FN(Date32Type, Date64Type);
 
-#define DATE64_CASES(FN, IN_TYPE) FN(Date64Type, Date64Type);
+#define DATE64_CASES(FN, IN_TYPE) \
+  FN(Date64Type, Date64Type);     \
+  FN(Date64Type, Date32Type);
 
-#define TIME32_CASES(FN, IN_TYPE)               \
-  FN(Time32Type, Time32Type);                   \
+#define TIME32_CASES(FN, IN_TYPE) \
+  FN(Time32Type, Time32Type);     \
   FN(Time32Type, Time64Type);
 
-#define TIME64_CASES(FN, IN_TYPE)               \
-  FN(Time64Type, Time32Type);                   \
+#define TIME64_CASES(FN, IN_TYPE) \
+  FN(Time64Type, Time32Type);     \
   FN(Time64Type, Time64Type);
 
 #define TIMESTAMP_CASES(FN, IN_TYPE) FN(TimestampType, TimestampType);
diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc
index dc9036db4f2..8a7ef923b47 100644
--- a/cpp/src/arrow/compute/compute-test.cc
+++ b/cpp/src/arrow/compute/compute-test.cc
@@ -278,11 +278,10 @@ TEST_F(TestCast, ToIntDowncastUnsafe) {
 TEST_F(TestCast, TimestampToTimestamp) {
   CastOptions options;
 
-  auto CheckTimestampCast = [this](const CastOptions& options,
-                                   TimeUnit::type from_unit, TimeUnit::type to_unit,
-                                   const std::vector<int64_t>& from_values,
-                                   const std::vector<int64_t>& to_values,
-                                   const std::vector<bool>& is_valid) {
+  auto CheckTimestampCast = [this](
+      const CastOptions& options, TimeUnit::type from_unit, TimeUnit::type to_unit,
+      const std::vector<int64_t>& from_values, const std::vector<int64_t>& to_values,
+      const std::vector<bool>& is_valid) {
     CheckCase<TimestampType, int64_t, TimestampType, int64_t>(
         timestamp(from_unit), from_values, is_valid, timestamp(to_unit), to_values,
         options);
@@ -318,8 +317,8 @@ TEST_F(TestCast, TimestampToTimestamp) {
   // Zero copy
   std::shared_ptr<Array> arr;
   vector<int64_t> v7 = {0, 70000, 2000, 1000, 0};
-  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid,
-                                          v7, &arr);
+  ArrayFromVector<TimestampType, int64_t>(timestamp(TimeUnit::SECOND), is_valid, v7,
+                                          &arr);
   CheckZeroCopy(*arr, timestamp(TimeUnit::SECOND));
 
   // Divide, truncate
@@ -356,49 +355,41 @@ TEST_F(TestCast, TimestampToTimestamp) {
                             timestamp(TimeUnit::SECOND), options);
 }
 
-
 TEST_F(TestCast, TimeToTime) {
   CastOptions options;
 
-
   vector<bool> is_valid = {true, false, true, true, true};
 
   // Multiply promotions
   vector<int32_t> v1 = {0, 100, 200, 1, 2};
   vector<int32_t> e1 = {0, 100000, 200000, 1000, 2000};
   CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::SECOND), v1, is_valid,
-      time32(TimeUnit::MILLI), e1, options);
+      time32(TimeUnit::SECOND), v1, is_valid, time32(TimeUnit::MILLI), e1, options);
 
   vector<int32_t> v2 = {0, 100, 200, 1, 2};
   vector<int64_t> e2 = {0, 100000000L, 200000000L, 1000000, 2000000};
   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v2, is_valid,
-      time64(TimeUnit::MICRO), e2, options);
+      time32(TimeUnit::SECOND), v2, is_valid, time64(TimeUnit::MICRO), e2, options);
 
   vector<int32_t> v3 = {0, 100, 200, 1, 2};
   vector<int64_t> e3 = {0, 100000000000L, 200000000000L, 1000000000L, 2000000000L};
   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::SECOND), v3, is_valid,
-      time64(TimeUnit::NANO), e3, options);
+      time32(TimeUnit::SECOND), v3, is_valid, time64(TimeUnit::NANO), e3, options);
 
   vector<int32_t> v4 = {0, 100, 200, 1, 2};
   vector<int64_t> e4 = {0, 100000, 200000, 1000, 2000};
   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v4, is_valid,
-      time64(TimeUnit::MICRO), e4, options);
+      time32(TimeUnit::MILLI), v4, is_valid, time64(TimeUnit::MICRO), e4, options);
 
   vector<int32_t> v5 = {0, 100, 200, 1, 2};
   vector<int64_t> e5 = {0, 100000000L, 200000000L, 1000000, 2000000};
   CheckCase<Time32Type, int32_t, Time64Type, int64_t>(
-      time32(TimeUnit::MILLI), v5, is_valid,
-      time64(TimeUnit::NANO), e5, options);
+      time32(TimeUnit::MILLI), v5, is_valid, time64(TimeUnit::NANO), e5, options);
 
   vector<int64_t> v6 = {0, 100, 200, 1, 2};
   vector<int64_t> e6 = {0, 100000, 200000, 1000, 2000};
   CheckCase<Time64Type, int64_t, Time64Type, int64_t>(
-      time64(TimeUnit::MICRO), v6, is_valid,
-      time64(TimeUnit::NANO), e6, options);
+      time64(TimeUnit::MICRO), v6, is_valid, time64(TimeUnit::NANO), e6, options);
 
   // Zero copy
   std::shared_ptr<Array> arr;
@@ -412,47 +403,76 @@ TEST_F(TestCast, TimeToTime) {
 
   options.allow_time_truncate = true;
   CheckCase<Time32Type, int32_t, Time32Type, int32_t>(
-      time32(TimeUnit::MILLI), v8, is_valid,
-      time32(TimeUnit::SECOND), e8, options);
+      time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND), e8, options);
   CheckCase<Time64Type, int32_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v8, is_valid,
-      time32(TimeUnit::MILLI), e8, options);
+      time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI), e8, options);
   CheckCase<Time64Type, int32_t, Time64Type, int32_t>(
-      time64(TimeUnit::NANO), v8, is_valid,
-      time64(TimeUnit::MICRO), e8, options);
+      time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO), e8, options);
 
   vector<int64_t> v9 = {0, 100123000, 200456000, 1123000, 2456000};
   vector<int32_t> e9 = {0, 100, 200, 1, 2};
   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::MICRO), v9, is_valid,
-      time32(TimeUnit::SECOND), e9, options);
+      time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND), e9, options);
   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v9, is_valid,
-      time32(TimeUnit::MILLI), e9, options);
+      time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI), e9, options);
 
   vector<int64_t> v10 = {0, 100123000000L, 200456000000L, 1123000000L, 2456000000};
   vector<int32_t> e10 = {0, 100, 200, 1, 2};
   CheckCase<Time64Type, int64_t, Time32Type, int32_t>(
-      time64(TimeUnit::NANO), v10, is_valid,
-      time32(TimeUnit::SECOND), e10, options);
+      time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND), e10, options);
 
   // Disallow truncate, failures
 
   options.allow_time_truncate = false;
-  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid,
-                         time32(TimeUnit::SECOND), options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid,
-                         time32(TimeUnit::MILLI), options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid,
-                         time64(TimeUnit::MICRO), options);
-  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid,
-                         time32(TimeUnit::SECOND), options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid,
-                         time32(TimeUnit::MILLI), options);
-  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid,
-                         time32(TimeUnit::SECOND), options);
+  CheckFails<Time32Type>(time32(TimeUnit::MILLI), v8, is_valid, time32(TimeUnit::SECOND),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v8, is_valid, time32(TimeUnit::MILLI),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v8, is_valid, time64(TimeUnit::MICRO),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::MICRO), v9, is_valid, time32(TimeUnit::SECOND),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v9, is_valid, time32(TimeUnit::MILLI),
+                         options);
+  CheckFails<Time64Type>(time64(TimeUnit::NANO), v10, is_valid, time32(TimeUnit::SECOND),
+                         options);
 }
 
+TEST_F(TestCast, DateToDate) {
+  CastOptions options;
+
+  vector<bool> is_valid = {true, false, true, true, true};
+
+  constexpr int64_t F = 86400000;
+
+  // Multiply promotion
+  vector<int32_t> v1 = {0, 100, 200, 1, 2};
+  vector<int64_t> e1 = {0, 100 * F, 200 * F, F, 2 * F};
+  CheckCase<Date32Type, int32_t, Date64Type, int64_t>(date32(), v1, is_valid, date64(),
+                                                      e1, options);
+
+  // Zero copy
+  std::shared_ptr<Array> arr;
+  vector<int32_t> v2 = {0, 70000, 2000, 1000, 0};
+  vector<int64_t> v3 = {0, 70000, 2000, 1000, 0};
+  ArrayFromVector<Date32Type, int32_t>(date32(), is_valid, v2, &arr);
+  CheckZeroCopy(*arr, date32());
+
+  ArrayFromVector<Date64Type, int64_t>(date64(), is_valid, v3, &arr);
+  CheckZeroCopy(*arr, date64());
+
+  // Divide, truncate
+  vector<int64_t> v8 = {0, 100 * F + 123, 200 * F + 456, F + 123, 2 * F + 456};
+  vector<int32_t> e8 = {0, 100, 200, 1, 2};
+
+  options.allow_time_truncate = true;
+  CheckCase<Date64Type, int64_t, Date32Type, int32_t>(date64(), v8, is_valid, date32(),
+                                                      e8, options);
+
+  // Disallow truncate, failures
+  options.allow_time_truncate = false;
+  CheckFails<Date64Type>(date64(), v8, is_valid, date32(), options);
+}
 
 TEST_F(TestCast, ToDouble) {
   CastOptions options;

From a820ed9029a006c64d198b89be39f2589376fa52 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 23:55:53 -0400
Subject: [PATCH 09/11] Add test case for ARROW-1680

Change-Id: I7a248251372c4cb86871b4c569f064de4c250670
---
 python/pyarrow/tests/test_array.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 418076f8196..dfb66d663a6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -290,6 +290,21 @@ def test_cast_integers_unsafe():
         _check_cast_case(case, safe=False)
 
 
+def test_cast_timestamp_unit():
+    # ARROW-1680
+    val = datetime.datetime.now()
+    s = pd.Series([val])
+    s_nyc = s.dt.tz_localize('tzlocal()').dt.tz_convert('America/New_York')
+
+    us_with_tz = pa.timestamp('us', tz='America/New_York')
+    arr = pa.Array.from_pandas(s_nyc, type=us_with_tz)
+
+    arr2 = pa.Array.from_pandas(s, type=pa.timestamp('us'))
+
+    assert arr[0].as_py() == s_nyc[0]
+    assert arr2[0].as_py() == s[0]
+
+
 def test_cast_signed_to_unsigned():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), pa.uint8(),

From 1d28ce27ad888526f7b79d229297817d2f097cff Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Tue, 24 Oct 2017 23:58:19 -0400
Subject: [PATCH 10/11] Fix typo

---
 cpp/src/arrow/compute/cast.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 60c8f100cc6..68a2b12379e 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -71,7 +71,7 @@ namespace compute {
 
 template <typename T>
 inline const T* GetValuesAs(const ArrayData& data, int i) {
-  return reinterpret_cast<const T*>(data.buffers[1]->data()) + data.offset;
+  return reinterpret_cast<const T*>(data.buffers[i]->data()) + data.offset;
 }
 
 namespace {
@@ -316,7 +316,7 @@ inline void ShiftTime(FunctionContext* ctx, const CastOptions& options,
       }
     }
   }
-}  // namespace
+}
 
 namespace {
 

From 6f6107fc779bf3463461ae74e9a155c78caecdf2 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Wed, 25 Oct 2017 14:13:24 -0400
Subject: [PATCH 11/11] Test that timestamp truncation raises for safe casts

Change-Id: Ie55185196ac7d417ad31892eae14e28d0cab516f
---
 python/pyarrow/array.pxi             |  4 ++--
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/tests/test_array.py   | 11 +++++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index c596d2ad8e7..72262f0c981 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -260,8 +260,8 @@ cdef class Array:
 
         type = _ensure_type(target_type)
 
-        if not safe:
-            options.allow_int_overflow = 1
+        options.allow_int_overflow = not safe
+        options.allow_time_truncate = not safe
 
         with nogil:
             check_status(Cast(_context(), self.ap[0], type.sp_type,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 0e5d4a8eddc..809bb96b7a4 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -747,6 +747,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
 
     cdef cppclass CCastOptions" arrow::compute::CastOptions":
         c_bool allow_int_overflow
+        c_bool allow_time_truncate
 
     CStatus Cast(CFunctionContext* context, const CArray& array,
                  const shared_ptr[CDataType]& to_type,
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index dfb66d663a6..e3a4c97567e 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -304,6 +304,17 @@ def test_cast_timestamp_unit():
     assert arr[0].as_py() == s_nyc[0]
     assert arr2[0].as_py() == s[0]
 
+    # Disallow truncation
+    arr = pa.array([123123], type='int64').cast(pa.timestamp('ms'))
+    expected = pa.array([123], type='int64').cast(pa.timestamp('s'))
+
+    target = pa.timestamp('s')
+    with pytest.raises(ValueError):
+        arr.cast(target)
+
+    result = arr.cast(target, safe=False)
+    assert result.equals(expected)
+
 
 def test_cast_signed_to_unsigned():
     safe_cases = [