From da96a382cb9b4bb68d87cfb3a9c3b0d888bbfe68 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Mon, 24 Apr 2017 00:16:36 -0400
Subject: [PATCH 1/4] Add failing Parquet test case. Enable same-type-size
 cases in pandas_convert.cc

Change-Id: I8b88d398e08c921b7990b96ffffbc40f58ac6938
---
 cpp/src/arrow/python/pandas_convert.cc |  2 +-
 cpp/src/arrow/python/type_traits.h     | 48 ++++++++++++++++++++++++++
 python/pyarrow/tests/test_ipc.py       |  3 +-
 python/pyarrow/tests/test_parquet.py   | 34 ++++++++++++++++++
 4 files changed, 85 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 636a3fd15c0..9f65af41bb2 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -444,7 +444,7 @@ inline Status PandasConverter::ConvertData(std::shared_ptr<Buffer>* data) {
   // Handle LONGLONG->INT64 and other fun things
   int type_num_compat = cast_npy_type_compat(PyArray_DESCR(arr_)->type_num);
 
-  if (traits::npy_type != type_num_compat) {
+  if (numpy_type_size(traits::npy_type) != numpy_type_size(type_num_compat)) {
     return Status::NotImplemented("NumPy type casts not yet implemented");
   }
 
diff --git a/cpp/src/arrow/python/type_traits.h b/cpp/src/arrow/python/type_traits.h
index 26b15bdc9f4..b6761ae0d26 100644
--- a/cpp/src/arrow/python/type_traits.h
+++ b/cpp/src/arrow/python/type_traits.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// Internal header
+
 #include "arrow/python/platform.h"
 
 #include <cstdint>
@@ -24,6 +26,7 @@
 
 #include "arrow/builder.h"
 #include "arrow/type.h"
+#include "arrow/util/logging.h"
 
 namespace arrow {
 namespace py {
@@ -224,5 +227,50 @@ struct arrow_traits<Type::BINARY> {
   static constexpr bool supports_nulls = true;
 };
 
+static inline int numpy_type_size(int npy_type) {
+  switch (npy_type) {
+    case NPY_BOOL:
+      return 1;
+    case NPY_INT8:
+      return 1;
+    case NPY_INT16:
+      return 2;
+    case NPY_INT32:
+      return 4;
+    case NPY_INT64:
+      return 8;
+#if (NPY_INT64 != NPY_LONGLONG)
+    case NPY_LONGLONG:
+      return 8;
+#endif
+    case NPY_UINT8:
+      return 1;
+    case NPY_UINT16:
+      return 2;
+    case NPY_UINT32:
+      return 4;
+    case NPY_UINT64:
+      return 8;
+#if (NPY_UINT64 != NPY_ULONGLONG)
+    case NPY_ULONGLONG:
+      return 8;
+#endif
+    case NPY_FLOAT16:
+      return 2;
+    case NPY_FLOAT32:
+      return 4;
+    case NPY_FLOAT64:
+      return 8;
+    case NPY_DATETIME:
+      return 8;
+    case NPY_OBJECT:
+      return sizeof(void*);
+    default:
+      DCHECK(false) << "unhandled numpy type";
+      break;
+  }
+  return -1;
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 81213ede315..02040678958 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -158,7 +158,8 @@ def run(self):
                 connection.close()
 
         def get_result(self):
-            return(self._schema, self._table if self._do_read_all else self._batches)
+            return(self._schema, self._table if self._do_read_all
+                   else self._batches)
 
     def setUp(self):
         # NOTE: must start and stop server in test
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 268e87af7dd..5a6f9fcb4c2 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -348,6 +348,40 @@ def test_column_of_lists(tmpdir):
     tm.assert_frame_equal(df, df_read)
 
 
+@parquet
+def test_date_time_types(tmpdir):
+    buf = io.BytesIO()
+
+    t1 = pa.date32()
+    data1 = np.array([17259, 17260, 17261], dtype='int32')
+    a1 = pa.Array.from_pandas(data1, type=t1)
+
+    t2 = pa.date64()
+    data2 = data1.astype('int64') * 86400000
+    a2 = pa.Array.from_pandas(data2, type=t2)
+
+    t3 = pa.timestamp('us')
+    start = pd.Timestamp('2000-01-01').value / 1000
+    data3 = np.array([start, start + 1, start + 2], dtype='int64')
+    a3 = pa.Array.from_pandas(data3, type=t3)
+
+    t4 = pa.time32('s')
+    data4 = np.arange(3, dtype='i4')
+    a4 = pa.Array.from_pandas(data4, type=t4)
+
+    t5 = pa.time64('us')
+    a5 = pa.Array.from_pandas(data4.astype('int64'), type=t5)
+
+    table = pa.Table.from_arrays([a1, a2, a3, a4, a5],
+                                 ['date32', 'date64', 'timestamp[us]',
+                                  'time32[s]', 'time32[us]'])
+    pq.write_table(table, buf, version="2.0")
+    buf.seek(0)
+
+    result = pq.read_table(buf)
+    assert result.equals(table)
+
+
 @parquet
 def test_multithreaded_read():
     df = alltypes_sample(size=10000)

From fad3934ae774326cfcf78d7a370d82372cca32bb Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Mon, 24 Apr 2017 08:29:58 -0400
Subject: [PATCH 2/4] Update test case

Change-Id: Ie32591cd8f4fd36f5572d535926fd0ab48b14271
---
 python/pyarrow/tests/test_parquet.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 5a6f9fcb4c2..a285b27b1b1 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -374,12 +374,18 @@ def test_date_time_types(tmpdir):
 
     table = pa.Table.from_arrays([a1, a2, a3, a4, a5],
                                  ['date32', 'date64', 'timestamp[us]',
-                                  'time32[s]', 'time32[us]'])
+                                  'time32[s]', 'time64[us]'])
+
+    # date64 as date32
+    expected = pa.Table.from_arrays([a1, a1, a3, a4, a5],
+                                    ['date32', 'date64', 'timestamp[us]',
+                                     'time32[s]', 'time64[us]'])
+
     pq.write_table(table, buf, version="2.0")
     buf.seek(0)
 
     result = pq.read_table(buf)
-    assert result.equals(table)
+    assert result.equals(expected)
 
 
 @parquet

From 475fa3f04df7ebe5ca7a13135e57960040fa14b8 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Mon, 24 Apr 2017 19:42:16 -0400
Subject: [PATCH 3/4] Fix test case

Change-Id: I2c5c3ecacc50b6faa63e5241119cf0b6914ae864
---
 cpp/src/arrow/util/stl.h             | 2 +-
 python/pyarrow/tests/test_parquet.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/util/stl.h b/cpp/src/arrow/util/stl.h
index bfce111ff8a..d58689b7488 100644
--- a/cpp/src/arrow/util/stl.h
+++ b/cpp/src/arrow/util/stl.h
@@ -20,7 +20,7 @@
 
 #include <vector>
 
-#include <arrow/util/logging.h>
+#include "arrow/util/logging.h"
 
 namespace arrow {
 
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index a285b27b1b1..9b6bd356f88 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -365,7 +365,7 @@ def test_date_time_types(tmpdir):
     data3 = np.array([start, start + 1, start + 2], dtype='int64')
     a3 = pa.Array.from_pandas(data3, type=t3)
 
-    t4 = pa.time32('s')
+    t4 = pa.time32('ms')
     data4 = np.arange(3, dtype='i4')
     a4 = pa.Array.from_pandas(data4, type=t4)
 

From db169409aebb964e329f48b1c02df3e5905554ea Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Mon, 24 Apr 2017 20:23:35 -0400
Subject: [PATCH 4/4] Add tests for auto-casted types, and unsupported
 nanosecond time

Change-Id: Ib95a2c36482e8f95f2ca21c48c2f66c35e87d837
---
 python/pyarrow/tests/test_parquet.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 9b6bd356f88..8c446af03fc 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -372,14 +372,21 @@ def test_date_time_types(tmpdir):
     t5 = pa.time64('us')
     a5 = pa.Array.from_pandas(data4.astype('int64'), type=t5)
 
-    table = pa.Table.from_arrays([a1, a2, a3, a4, a5],
+    t6 = pa.time32('s')
+    a6 = pa.Array.from_pandas(data4, type=t6)
+
+    ex_t6 = pa.time32('ms')
+    ex_a6 = pa.Array.from_pandas(data4 * 1000, type=ex_t6)
+
+    table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6],
                                  ['date32', 'date64', 'timestamp[us]',
-                                  'time32[s]', 'time64[us]'])
+                                  'time32[s]', 'time64[us]', 'time32[s]'])
 
     # date64 as date32
-    expected = pa.Table.from_arrays([a1, a1, a3, a4, a5],
+    # time32[s] to time32[ms]
+    expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6],
                                     ['date32', 'date64', 'timestamp[us]',
-                                     'time32[s]', 'time64[us]'])
+                                     'time32[s]', 'time64[us]', 'time32[s]'])
 
     pq.write_table(table, buf, version="2.0")
     buf.seek(0)
@@ -387,6 +394,19 @@ def test_date_time_types(tmpdir):
     result = pq.read_table(buf)
     assert result.equals(expected)
 
+    # Unsupported stuff
+    def _assert_unsupported(array):
+        table = pa.Table.from_arrays([array], ['unsupported'])
+        buf = io.BytesIO()
+
+        with pytest.raises(NotImplementedError):
+            pq.write_table(table, buf, version="2.0")
+
+    t7 = pa.time64('ns')
+    a7 = pa.Array.from_pandas(data4.astype('int64'), type=t7)
+
+    _assert_unsupported(a7)
+
 
 @parquet
 def test_multithreaded_read():