From f5cbf8f8fbc7ddc2bd47f13b6548edd8d5e2581c Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 30 Sep 2020 00:23:36 -0700
Subject: [PATCH 1/9] Add test for pandas conversion

---
 python/pyarrow/tests/test_pandas.py | 10 ++++++++++
 1 file changed, 10 insertions(+)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 6eae419f05a..93803ebb3e9 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2132,6 +2132,16 @@ def test_auto_chunking_on_list_overflow(self):
         assert len(column_a.chunk(0)) == 2**24 - 1
         assert len(column_a.chunk(1)) == 1
 
+    def test_map_array(self):
+        data = [[(b'a', 1), (b'b', 2)],
+                None,
+                [(b'd', 4), (b'e', 5), (b'f', None)],
+                [(b'g', 7)]]
+
+        arr = pa.array(data, type=pa.map_(pa.binary(), pa.int32()))
+        s = arr.to_pandas()
+        tm.assert_series_equal(s, pd.Series(data), check_names=False)
+
 
 class TestConvertStructTypes:
     """

From 6c8558a71895f37a3b1c1ef0f8601aae1fe41ae9 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 30 Sep 2020 00:24:07 -0700
Subject: [PATCH 2/9] Started pandas map converter

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 85 ++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index e4d3e793d25..1951d0f19ab 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -791,6 +791,84 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
   return Status::OK();
 }
 
+
+inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
+                         PyObject** out_values) {
+  // Get column of underlying map arrays
+  std::vector<std::shared_ptr<Array>> map_arrays;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = checked_cast<const MapArray&>(*data.chunk(c));
+    map_arrays.emplace_back(arr.values());
+  }
+  const auto& map_type = checked_cast<const MapType&>(*data.type());
+  auto entry_type = map_type.value_type();
+  const auto& key_type = map_type.key_type();
+  /*using ListArrayType = typename ListArrayT::TypeClass;
+  const auto& map_type = checked_cast<const ListArrayType&>(*data.type());
+  auto value_type = list_type.value_type();
+
+  if (value_type->id() == Type::DICTIONARY) {
+    // ARROW-6899: Convert dictionary-encoded children to dense instead of
+    // failing below. A more efficient conversion than this could be done later
+    auto dense_type = checked_cast<const DictionaryType&>(*value_type).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &value_arrays));
+    value_type = dense_type;
+  }*/
+
+  auto flat_column = std::make_shared<ChunkedArray>(map_arrays, entry_type);
+  // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
+  //    Storing a reference to the whole Array would be too expensive.
+
+  // ARROW-3789(wesm): During refactoring I found that unit tests assumed that
+  // timestamp units would be preserved on list<timestamp UNIT> conversions in
+  // Table.to_pandas. So we set the option here to not coerce things to
+  // nanoseconds. Bit of a hack but this seemed the simplest thing to satisfy
+  // the existing unit tests
+  PandasOptions modified_options = options;
+  modified_options.coerce_temporal_nanoseconds = false;
+
+  OwnedRefNoGIL owned_numpy_array;
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_column, nullptr,
+                                            owned_numpy_array.ref()));
+
+  PyObject* numpy_array = owned_numpy_array.obj();
+
+  int64_t chunk_offset = 0;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    auto arr = std::static_pointer_cast<MapArray>(data.chunk(c));
+
+    /*const bool has_nulls = data.null_count() > 0;
+    for (int64_t i = 0; i < arr->length(); ++i) {
+      if (has_nulls && arr->IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+      } else {
+        OwnedRef start(PyLong_FromLongLong(arr->value_offset(i) + chunk_offset));
+        OwnedRef end(PyLong_FromLongLong(arr->value_offset(i + 1) + chunk_offset));
+        OwnedRef slice(PySlice_New(start.obj(), end.obj(), nullptr));
+
+        if (ARROW_PREDICT_FALSE(slice.obj() == nullptr)) {
+          // Fall out of loop, will return from RETURN_IF_PYERROR
+          break;
+        }
+        *out_values = PyObject_GetItem(numpy_array, slice.obj());
+
+        if (*out_values == nullptr) {
+          // Fall out of loop, will return from RETURN_IF_PYERROR
+          break;
+        }
+      }
+      ++out_values;
+    }*/
+    RETURN_IF_PYERROR();
+
+    chunk_offset += arr->values()->length();
+  }
+
+  return Status::OK();
+}
+
+
 template <typename InType, typename OutType>
 inline void ConvertNumericNullable(const ChunkedArray& data, InType na_value,
                                    OutType* out_values) {
@@ -1027,6 +1105,10 @@ struct ObjectWriterVisitor {
     return ConvertListsLike<ArrayType>(options, data, out_values);
   }
 
+  Status Visit(const MapType& type) {
+    return ConvertMap(options, data, out_values);
+  }
+
   Status Visit(const StructType& type) {
     return ConvertStruct(options, data, out_values);
   }
@@ -1801,7 +1883,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     } break;
     case Type::FIXED_SIZE_LIST:
     case Type::LIST:
-    case Type::LARGE_LIST: {
+    case Type::LARGE_LIST: 
+    case Type::MAP: {
       auto list_type = std::static_pointer_cast<BaseListType>(data.type());
       if (!ListTypeSupported(*list_type->value_type())) {
         return Status::NotImplemented("Not implemented type for Arrow list to pandas: ",

From e85014ab43977e7b692dd294166f3251faa812e4 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Fri, 2 Oct 2020 16:45:52 -0700
Subject: [PATCH 3/9] Conversion working except when item has a NULL

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 126 +++++++++++++++++++-----
 python/pyarrow/tests/test_pandas.py     |   2 +
 2 files changed, 104 insertions(+), 24 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 1951d0f19ab..8b62a269a65 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -794,15 +794,20 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
 
 inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
                          PyObject** out_values) {
-  // Get column of underlying map arrays
-  std::vector<std::shared_ptr<Array>> map_arrays;
-  for (int c = 0; c < data.num_chunks(); c++) {
-    const auto& arr = checked_cast<const MapArray&>(*data.chunk(c));
-    map_arrays.emplace_back(arr.values());
+  std::cout << "*** CONVERT_MAP" << std::endl;
+  // Get columns of underlying key/item arrays
+  std::vector<std::shared_ptr<Array>> key_arrays;
+  std::vector<std::shared_ptr<Array>> item_arrays;
+  for (int c = 0; c < data.num_chunks(); ++c) {
+    const auto& map_arr = checked_cast<const MapArray&>(*data.chunk(c));
+    key_arrays.emplace_back(map_arr.keys());
+    item_arrays.emplace_back(map_arr.items());
   }
+
   const auto& map_type = checked_cast<const MapType&>(*data.type());
-  auto entry_type = map_type.value_type();
+  //auto entry_type = map_type.value_type();
   const auto& key_type = map_type.key_type();
+  const auto& item_type = map_type.item_type();
   /*using ListArrayType = typename ListArrayT::TypeClass;
   const auto& map_type = checked_cast<const ListArrayType&>(*data.type());
   auto value_type = list_type.value_type();
@@ -815,7 +820,8 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
     value_type = dense_type;
   }*/
 
-  auto flat_column = std::make_shared<ChunkedArray>(map_arrays, entry_type);
+  auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
+  auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
   // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
   //    Storing a reference to the whole Array would be too expensive.
 
@@ -827,39 +833,111 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   PandasOptions modified_options = options;
   modified_options.coerce_temporal_nanoseconds = false;
 
-  OwnedRefNoGIL owned_numpy_array;
-  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_column, nullptr,
-                                            owned_numpy_array.ref()));
-
-  PyObject* numpy_array = owned_numpy_array.obj();
+  std::cout << "*** starting key item conversion" << std::endl;
+ 
+  OwnedRef list_item;
+  OwnedRef tuple_item;
+  OwnedRefNoGIL owned_numpy_keys;
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_keys, nullptr,
+                                            owned_numpy_keys.ref()));
+  OwnedRefNoGIL owned_numpy_items;
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_items, nullptr,
+                                            owned_numpy_items.ref()));
+  std::cout << "*** done key item conversion" << std::endl;
+  PyArrayObject* keys_array = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
+  PyArrayObject* items_array = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
+
+  std::cout << "*** starting chunk loop" << std::endl;
 
   int64_t chunk_offset = 0;
-  for (int c = 0; c < data.num_chunks(); c++) {
+  for (int c = 0; c < data.num_chunks(); ++c) {
     auto arr = std::static_pointer_cast<MapArray>(data.chunk(c));
+    const bool has_nulls = data.null_count() > 0;
 
-    /*const bool has_nulls = data.null_count() > 0;
+    // Make a list of key/item pairs for each row in array
     for (int64_t i = 0; i < arr->length(); ++i) {
       if (has_nulls && arr->IsNull(i)) {
         Py_INCREF(Py_None);
         *out_values = Py_None;
       } else {
-        OwnedRef start(PyLong_FromLongLong(arr->value_offset(i) + chunk_offset));
-        OwnedRef end(PyLong_FromLongLong(arr->value_offset(i + 1) + chunk_offset));
-        OwnedRef slice(PySlice_New(start.obj(), end.obj(), nullptr));
+        int64_t entry_offset = arr->value_offset(i);
+        int64_t num_maps = arr->value_offset(i + 1) - entry_offset;
+        OwnedRef key_value;
+        OwnedRef item_value;
+
+        std::cout << "*** row loop " << i << std::endl;
+        // Build the new list object for the row of maps
+        list_item.reset(PyList_New(0)); //num_maps));
+        RETURN_IF_PYERROR();
+        
+        // Add each key/item pair in the row
+        for (int64_t j = 0; j < num_maps; ++j) {
 
-        if (ARROW_PREDICT_FALSE(slice.obj() == nullptr)) {
-          // Fall out of loop, will return from RETURN_IF_PYERROR
-          break;
+          std::cout << "*** map loop " << j << std::endl;
+
+          // Build the new tuple object for key and item
+          tuple_item.reset(PyTuple_New(2));
+          RETURN_IF_PYERROR();
+
+          // Get key value, key is non-nullable for a valid row
+          auto ptr_key = reinterpret_cast<const char*>(
+              PyArray_GETPTR1(keys_array, chunk_offset + entry_offset + j));
+          key_value.reset(PyArray_GETITEM(keys_array, ptr_key));
+          RETURN_IF_PYERROR();
+
+          std::cout << "*** set key" << std::endl;
+          //PyObject* int_key = PyLong_FromLong(1L);
+          //PyTuple_SetItem(tuple_item.obj(), 0, int_key);
+          PyTuple_SetItem(tuple_item.obj(), 0, key_value.obj());
+          RETURN_IF_PYERROR();
+
+          std::cout << "*** item len: " << flat_items->length() << ", item index: " << (entry_offset + j) << std::endl;
+          
+          if (!item_arrays[c]->IsNull(entry_offset + j)) {
+            // Get valid value from item array
+            auto ptr_item = reinterpret_cast<const char*>(
+                PyArray_GETPTR1(items_array, chunk_offset + entry_offset + j));
+            item_value.reset(PyArray_GETITEM(items_array, ptr_item));
+            RETURN_IF_PYERROR();
+          } else {
+            std::cout << "*** item is null: " << entry_offset + j << std::endl;
+            // Translate the Null to a None
+            Py_INCREF(Py_None);
+            item_value.reset(Py_None);
+            std::cout << "*** item set to null" << std::endl;
+          }
+
+          std::cout << "*** set item" << std::endl;
+          //PyObject* int_item = PyLong_FromLong(2L);
+          //PyTuple_SetItem(tuple_item.obj(), 1, int_item);
+          PyTuple_SetItem(tuple_item.obj(), 1, item_value.obj());
+          RETURN_IF_PYERROR();
+
+          std::cout << "*** set list" << std::endl;
+          //Py_INCREF(tuple_item.obj());
+          // Add the key/item pair to the list for the row
+          PyList_Append(list_item.obj(), tuple_item.obj());
+          RETURN_IF_PYERROR();
+
+          std::cout << "*** end map loop" << std::endl;
         }
-        *out_values = PyObject_GetItem(numpy_array, slice.obj());
 
-        if (*out_values == nullptr) {
+
+        std::cout << "*** assign out_values" << std::endl;
+        
+        *out_values = list_item.obj();
+        // Grant ownership to the resulting array
+        Py_INCREF(*out_values);
+ 
+        /*if (*out_values == nullptr) {
           // Fall out of loop, will return from RETURN_IF_PYERROR
           break;
-        }
+        }*/
       }
       ++out_values;
-    }*/
+    }
+
+    std::cout << "*** end chunk loop" << std::endl;
     RETURN_IF_PYERROR();
 
     chunk_offset += arr->values()->length();
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 93803ebb3e9..e83a59b4a9e 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2138,6 +2138,8 @@ def test_map_array(self):
                 [(b'd', 4), (b'e', 5), (b'f', None)],
                 [(b'g', 7)]]
 
+        data = [[(b'a', 1)], [(b'b', 2)]]
+
         arr = pa.array(data, type=pa.map_(pa.binary(), pa.int32()))
         s = arr.to_pandas()
         tm.assert_series_equal(s, pd.Series(data), check_names=False)

From c5feb6ca5558d17d170fb26c245bddb2ae713e56 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Sat, 3 Oct 2020 22:34:46 -0700
Subject: [PATCH 4/9] Fixed setting tuples with nulls

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 63 ++++---------------------
 1 file changed, 10 insertions(+), 53 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 8b62a269a65..dafd29711db 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -794,7 +794,6 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
 
 inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
                          PyObject** out_values) {
-  std::cout << "*** CONVERT_MAP" << std::endl;
   // Get columns of underlying key/item arrays
   std::vector<std::shared_ptr<Array>> key_arrays;
   std::vector<std::shared_ptr<Array>> item_arrays;
@@ -833,22 +832,18 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   PandasOptions modified_options = options;
   modified_options.coerce_temporal_nanoseconds = false;
 
-  std::cout << "*** starting key item conversion" << std::endl;
- 
   OwnedRef list_item;
-  OwnedRef tuple_item;
+  OwnedRef key_value;
+  OwnedRef item_value;
   OwnedRefNoGIL owned_numpy_keys;
   RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_keys, nullptr,
                                             owned_numpy_keys.ref()));
   OwnedRefNoGIL owned_numpy_items;
   RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_items, nullptr,
                                             owned_numpy_items.ref()));
-  std::cout << "*** done key item conversion" << std::endl;
   PyArrayObject* keys_array = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
   PyArrayObject* items_array = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
 
-  std::cout << "*** starting chunk loop" << std::endl;
-
   int64_t chunk_offset = 0;
   for (int c = 0; c < data.num_chunks(); ++c) {
     auto arr = std::static_pointer_cast<MapArray>(data.chunk(c));
@@ -862,82 +857,44 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
       } else {
         int64_t entry_offset = arr->value_offset(i);
         int64_t num_maps = arr->value_offset(i + 1) - entry_offset;
-        OwnedRef key_value;
-        OwnedRef item_value;
 
-        std::cout << "*** row loop " << i << std::endl;
         // Build the new list object for the row of maps
-        list_item.reset(PyList_New(0)); //num_maps));
+        list_item.reset(PyList_New(0));
         RETURN_IF_PYERROR();
         
         // Add each key/item pair in the row
         for (int64_t j = 0; j < num_maps; ++j) {
 
-          std::cout << "*** map loop " << j << std::endl;
-
-          // Build the new tuple object for key and item
-          tuple_item.reset(PyTuple_New(2));
-          RETURN_IF_PYERROR();
-
           // Get key value, key is non-nullable for a valid row
           auto ptr_key = reinterpret_cast<const char*>(
               PyArray_GETPTR1(keys_array, chunk_offset + entry_offset + j));
           key_value.reset(PyArray_GETITEM(keys_array, ptr_key));
           RETURN_IF_PYERROR();
 
-          std::cout << "*** set key" << std::endl;
-          //PyObject* int_key = PyLong_FromLong(1L);
-          //PyTuple_SetItem(tuple_item.obj(), 0, int_key);
-          PyTuple_SetItem(tuple_item.obj(), 0, key_value.obj());
-          RETURN_IF_PYERROR();
-
-          std::cout << "*** item len: " << flat_items->length() << ", item index: " << (entry_offset + j) << std::endl;
-          
-          if (!item_arrays[c]->IsNull(entry_offset + j)) {
+          if (item_arrays[c]->IsNull(entry_offset + j)) {
+            // Translate the Null to a None
+            Py_INCREF(Py_None);
+            item_value.reset(Py_None);
+          } else {
             // Get valid value from item array
             auto ptr_item = reinterpret_cast<const char*>(
                 PyArray_GETPTR1(items_array, chunk_offset + entry_offset + j));
             item_value.reset(PyArray_GETITEM(items_array, ptr_item));
             RETURN_IF_PYERROR();
-          } else {
-            std::cout << "*** item is null: " << entry_offset + j << std::endl;
-            // Translate the Null to a None
-            Py_INCREF(Py_None);
-            item_value.reset(Py_None);
-            std::cout << "*** item set to null" << std::endl;
           }
 
-          std::cout << "*** set item" << std::endl;
-          //PyObject* int_item = PyLong_FromLong(2L);
-          //PyTuple_SetItem(tuple_item.obj(), 1, int_item);
-          PyTuple_SetItem(tuple_item.obj(), 1, item_value.obj());
-          RETURN_IF_PYERROR();
-
-          std::cout << "*** set list" << std::endl;
-          //Py_INCREF(tuple_item.obj());
           // Add the key/item pair to the list for the row
-          PyList_Append(list_item.obj(), tuple_item.obj());
+          PyList_Append(list_item.obj(),
+                        PyTuple_Pack(2, key_value.obj(), item_value.obj()));
           RETURN_IF_PYERROR();
-
-          std::cout << "*** end map loop" << std::endl;
         }
 
-
-        std::cout << "*** assign out_values" << std::endl;
-        
         *out_values = list_item.obj();
         // Grant ownership to the resulting array
         Py_INCREF(*out_values);
- 
-        /*if (*out_values == nullptr) {
-          // Fall out of loop, will return from RETURN_IF_PYERROR
-          break;
-        }*/
       }
       ++out_values;
     }
-
-    std::cout << "*** end chunk loop" << std::endl;
     RETURN_IF_PYERROR();
 
     chunk_offset += arr->values()->length();

From 82ead951259e22dc70b4725e4e4dfcd27a0fed22 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Sun, 4 Oct 2020 23:01:41 -0700
Subject: [PATCH 5/9] Added roundtrip and chunked tests

---
 python/pyarrow/tests/test_pandas.py | 37 +++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index e83a59b4a9e..70999d52704 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2132,17 +2132,46 @@ def test_auto_chunking_on_list_overflow(self):
         assert len(column_a.chunk(0)) == 2**24 - 1
         assert len(column_a.chunk(1)) == 1
 
-    def test_map_array(self):
+    def test_map_array_roundtrip(self):
+        data = [[(b'a', 1), (b'b', 2)],
+                [(b'c', 3)],
+                [(b'd', 4), (b'e', 5), (b'f', 6)],
+                [(b'g', 7)]]
+
+        df = pd.DataFrame({"map": data})
+        schema = pa.schema([("map", pa.map_(pa.binary(), pa.int32()))])
+
+        _check_pandas_roundtrip(df, schema=schema)
+
+    def test_map_array_chunked(self):
+        data1 = [[(b'a', 1), (b'b', 2)],
+                 [(b'c', 3)],
+                 [(b'd', 4), (b'e', 5), (b'f', 6)],
+                 [(b'g', 7)]]
+        data2 = [[(k, v * 2) for k, v in row] for row in data1]
+
+        arr1 = pa.array(data1, type=pa.map_(pa.binary(), pa.int32()))
+        arr2 = pa.array(data2, type=pa.map_(pa.binary(), pa.int32()))
+        arr = pa.chunked_array([arr1, arr2])
+
+        expected = pd.Series(data1 + data2)
+        actual = arr.to_pandas()
+        tm.assert_series_equal(actual, expected, check_names=False)
+
+    def test_map_array_with_nulls(self):
         data = [[(b'a', 1), (b'b', 2)],
                 None,
                 [(b'd', 4), (b'e', 5), (b'f', None)],
                 [(b'g', 7)]]
 
-        data = [[(b'a', 1)], [(b'b', 2)]]
+        # None value in item array causes upcast to float
+        expected = [[(k, float(v) if v is not None else None) for k, v in row]
+                    if row is not None else None for row in data]
+        expected = pd.Series(expected)
 
         arr = pa.array(data, type=pa.map_(pa.binary(), pa.int32()))
-        s = arr.to_pandas()
-        tm.assert_series_equal(s, pd.Series(data), check_names=False)
+        actual = arr.to_pandas()
+        tm.assert_series_equal(actual, expected, check_names=False)
 
 
 class TestConvertStructTypes:

From 43263a7007e8248114725333cc2300273299b119 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 5 Oct 2020 00:00:16 -0700
Subject: [PATCH 6/9] Fix lint errors

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 33 ++++++++++---------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index dafd29711db..2535333e1f2 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -791,7 +791,6 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
   return Status::OK();
 }
 
-
 inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
                          PyObject** out_values) {
   // Get columns of underlying key/item arrays
@@ -804,13 +803,11 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   }
 
   const auto& map_type = checked_cast<const MapType&>(*data.type());
-  //auto entry_type = map_type.value_type();
   const auto& key_type = map_type.key_type();
   const auto& item_type = map_type.item_type();
-  /*using ListArrayType = typename ListArrayT::TypeClass;
-  const auto& map_type = checked_cast<const ListArrayType&>(*data.type());
-  auto value_type = list_type.value_type();
 
+  // TODO: Is this needed for key/item?
+  /*
   if (value_type->id() == Type::DICTIONARY) {
     // ARROW-6899: Convert dictionary-encoded children to dense instead of
     // failing below. A more efficient conversion than this could be done later
@@ -819,8 +816,6 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
     value_type = dense_type;
   }*/
 
-  auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
-  auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
   // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
   //    Storing a reference to the whole Array would be too expensive.
 
@@ -832,6 +827,8 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   PandasOptions modified_options = options;
   modified_options.coerce_temporal_nanoseconds = false;
 
+  auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
+  auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
   OwnedRef list_item;
   OwnedRef key_value;
   OwnedRef item_value;
@@ -841,8 +838,8 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   OwnedRefNoGIL owned_numpy_items;
   RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_items, nullptr,
                                             owned_numpy_items.ref()));
-  PyArrayObject* keys_array = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
-  PyArrayObject* items_array = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
+  PyArrayObject* py_keys = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
+  PyArrayObject* py_items = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
 
   int64_t chunk_offset = 0;
   for (int c = 0; c < data.num_chunks(); ++c) {
@@ -861,14 +858,13 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
         // Build the new list object for the row of maps
         list_item.reset(PyList_New(0));
         RETURN_IF_PYERROR();
-        
+
         // Add each key/item pair in the row
         for (int64_t j = 0; j < num_maps; ++j) {
-
           // Get key value, key is non-nullable for a valid row
           auto ptr_key = reinterpret_cast<const char*>(
-              PyArray_GETPTR1(keys_array, chunk_offset + entry_offset + j));
-          key_value.reset(PyArray_GETITEM(keys_array, ptr_key));
+              PyArray_GETPTR1(py_keys, chunk_offset + entry_offset + j));
+          key_value.reset(PyArray_GETITEM(py_keys, ptr_key));
           RETURN_IF_PYERROR();
 
           if (item_arrays[c]->IsNull(entry_offset + j)) {
@@ -878,8 +874,8 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
           } else {
             // Get valid value from item array
             auto ptr_item = reinterpret_cast<const char*>(
-                PyArray_GETPTR1(items_array, chunk_offset + entry_offset + j));
-            item_value.reset(PyArray_GETITEM(items_array, ptr_item));
+                PyArray_GETPTR1(py_items, chunk_offset + entry_offset + j));
+            item_value.reset(PyArray_GETITEM(py_items, ptr_item));
             RETURN_IF_PYERROR();
           }
 
@@ -903,7 +899,6 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   return Status::OK();
 }
 
-
 template <typename InType, typename OutType>
 inline void ConvertNumericNullable(const ChunkedArray& data, InType na_value,
                                    OutType* out_values) {
@@ -1140,9 +1135,7 @@ struct ObjectWriterVisitor {
     return ConvertListsLike<ArrayType>(options, data, out_values);
   }
 
-  Status Visit(const MapType& type) {
-    return ConvertMap(options, data, out_values);
-  }
+  Status Visit(const MapType& type) { return ConvertMap(options, data, out_values); }
 
   Status Visit(const StructType& type) {
     return ConvertStruct(options, data, out_values);
@@ -1918,7 +1911,7 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
     } break;
     case Type::FIXED_SIZE_LIST:
     case Type::LIST:
-    case Type::LARGE_LIST: 
+    case Type::LARGE_LIST:
     case Type::MAP: {
       auto list_type = std::static_pointer_cast<BaseListType>(data.type());
       if (!ListTypeSupported(*list_type->value_type())) {

From c469dcb264265fe7f74dd2e88c7c84ba23fad4ab Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Mon, 5 Oct 2020 10:44:17 -0700
Subject: [PATCH 7/9] Handle dictionary encoded arrays

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 27 ++++++++++++++-----------
 python/pyarrow/tests/test_pandas.py     | 13 ++++++++++++
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 2535333e1f2..012e72fd07c 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -803,18 +803,21 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   }
 
   const auto& map_type = checked_cast<const MapType&>(*data.type());
-  const auto& key_type = map_type.key_type();
-  const auto& item_type = map_type.item_type();
-
-  // TODO: Is this needed for key/item?
-  /*
-  if (value_type->id() == Type::DICTIONARY) {
-    // ARROW-6899: Convert dictionary-encoded children to dense instead of
-    // failing below. A more efficient conversion than this could be done later
-    auto dense_type = checked_cast<const DictionaryType&>(*value_type).value_type();
-    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &value_arrays));
-    value_type = dense_type;
-  }*/
+  auto key_type = map_type.key_type();
+  auto item_type = map_type.item_type();
+
+  // ARROW-6899: Convert dictionary-encoded children to dense instead of
+  // failing below. A more efficient conversion than this could be done later
+  if (key_type->id() == Type::DICTIONARY) {
+    auto dense_type = checked_cast<const DictionaryType&>(*key_type).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &key_arrays));
+    key_type = dense_type;
+  }
+  if (item_type->id() == Type::DICTIONARY) {
+    auto dense_type = checked_cast<const DictionaryType&>(*item_type).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &item_arrays));
+    item_type = dense_type;
+  }
 
   // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
   //    Storing a reference to the whole Array would be too expensive.
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 70999d52704..0e01dc08ef6 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2173,6 +2173,19 @@ def test_map_array_with_nulls(self):
         actual = arr.to_pandas()
         tm.assert_series_equal(actual, expected, check_names=False)
 
+    def test_map_array_dictionary_encoded(self):
+        offsets = pa.array([0, 3, 5])
+        items = pa.array(['a', 'b', 'c', 'a', 'd']).dictionary_encode()
+        keys = pa.array(list(range(len(items))))
+        arr = pa.MapArray.from_arrays(offsets, keys, items)
+
+        # Dictionary encoded values converted to dense
+        expected = pd.Series(
+            [[(0, 'a'), (1, 'b'), (2, 'c')], [(3, 'a'), (4, 'd')]])
+
+        actual = arr.to_pandas()
+        tm.assert_series_equal(actual, expected, check_names=False)
+
 
 class TestConvertStructTypes:
     """

From c2a804ef9525bc0c16c364ce2035895fe53e0d81 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 7 Oct 2020 00:14:06 -0700
Subject: [PATCH 8/9] Address feedback, rebased with recent changes

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 44 +++++++++++--------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 012e72fd07c..910feb8b23d 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -791,8 +791,8 @@ Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
   return Status::OK();
 }
 
-inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
-                         PyObject** out_values) {
+Status ConvertMap(PandasOptions options, const ChunkedArray& data,
+                  PyObject** out_values) {
   // Get columns of underlying key/item arrays
   std::vector<std::shared_ptr<Array>> key_arrays;
   std::vector<std::shared_ptr<Array>> item_arrays;
@@ -819,16 +819,10 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
     item_type = dense_type;
   }
 
-  // TODO(ARROW-489): Currently we don't have a Python reference for single columns.
-  //    Storing a reference to the whole Array would be too expensive.
-
-  // ARROW-3789(wesm): During refactoring I found that unit tests assumed that
-  // timestamp units would be preserved on list<timestamp UNIT> conversions in
-  // Table.to_pandas. So we set the option here to not coerce things to
-  // nanoseconds. Bit of a hack but this seemed the simplest thing to satisfy
-  // the existing unit tests
-  PandasOptions modified_options = options;
-  modified_options.coerce_temporal_nanoseconds = false;
+  // See notes in MakeInnerOptions.
+  options = MakeInnerOptions(std::move(options));
+  // Don't blindly convert because timestamps in lists are handled differently.
+  options.timestamp_as_object = true;
 
   auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
   auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
@@ -836,30 +830,30 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
   OwnedRef key_value;
   OwnedRef item_value;
   OwnedRefNoGIL owned_numpy_keys;
-  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_keys, nullptr,
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_keys, nullptr,
                                             owned_numpy_keys.ref()));
   OwnedRefNoGIL owned_numpy_items;
-  RETURN_NOT_OK(ConvertChunkedArrayToPandas(modified_options, flat_items, nullptr,
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_items, nullptr,
                                             owned_numpy_items.ref()));
   PyArrayObject* py_keys = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
   PyArrayObject* py_items = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
 
   int64_t chunk_offset = 0;
   for (int c = 0; c < data.num_chunks(); ++c) {
-    auto arr = std::static_pointer_cast<MapArray>(data.chunk(c));
+    const auto& arr = checked_cast<const MapArray&>(*data.chunk(c));
     const bool has_nulls = data.null_count() > 0;
 
     // Make a list of key/item pairs for each row in array
-    for (int64_t i = 0; i < arr->length(); ++i) {
-      if (has_nulls && arr->IsNull(i)) {
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      if (has_nulls && arr.IsNull(i)) {
         Py_INCREF(Py_None);
         *out_values = Py_None;
       } else {
-        int64_t entry_offset = arr->value_offset(i);
-        int64_t num_maps = arr->value_offset(i + 1) - entry_offset;
+        int64_t entry_offset = arr.value_offset(i);
+        int64_t num_maps = arr.value_offset(i + 1) - entry_offset;
 
         // Build the new list object for the row of maps
-        list_item.reset(PyList_New(0));
+        list_item.reset(PyList_New(num_maps));
         RETURN_IF_PYERROR();
 
         // Add each key/item pair in the row
@@ -883,20 +877,20 @@ inline Status ConvertMap(const PandasOptions& options, const ChunkedArray& data,
           }
 
           // Add the key/item pair to the list for the row
-          PyList_Append(list_item.obj(),
-                        PyTuple_Pack(2, key_value.obj(), item_value.obj()));
+          PyList_SET_ITEM(list_item.obj(), j,
+                          PyTuple_Pack(2, key_value.obj(), item_value.obj()));
           RETURN_IF_PYERROR();
         }
 
         *out_values = list_item.obj();
-        // Grant ownership to the resulting array
-        Py_INCREF(*out_values);
+        // Release ownership to the resulting array
+        list_item.detach();
       }
       ++out_values;
     }
     RETURN_IF_PYERROR();
 
-    chunk_offset += arr->values()->length();
+    chunk_offset += arr.values()->length();
   }
 
   return Status::OK();

From 090177e4ebe7c7c71f3460ad598c7ec5bd661ac5 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 7 Oct 2020 11:32:02 +0200
Subject: [PATCH 9/9] Nits & lint

---
 cpp/src/arrow/python/arrow_to_pandas.cc | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/cpp/src/arrow/python/arrow_to_pandas.cc
index 910feb8b23d..be27a108dd1 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -830,11 +830,11 @@ Status ConvertMap(PandasOptions options, const ChunkedArray& data,
   OwnedRef key_value;
   OwnedRef item_value;
   OwnedRefNoGIL owned_numpy_keys;
-  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_keys, nullptr,
-                                            owned_numpy_keys.ref()));
+  RETURN_NOT_OK(
+      ConvertChunkedArrayToPandas(options, flat_keys, nullptr, owned_numpy_keys.ref()));
   OwnedRefNoGIL owned_numpy_items;
-  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_items, nullptr,
-                                            owned_numpy_items.ref()));
+  RETURN_NOT_OK(
+      ConvertChunkedArrayToPandas(options, flat_items, nullptr, owned_numpy_items.ref()));
   PyArrayObject* py_keys = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
   PyArrayObject* py_items = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
 
@@ -882,9 +882,8 @@ Status ConvertMap(PandasOptions options, const ChunkedArray& data,
           RETURN_IF_PYERROR();
         }
 
-        *out_values = list_item.obj();
-        // Release ownership to the resulting array
-        list_item.detach();
+        // Pass ownership to the resulting array
+        *out_values = list_item.detach();
       }
       ++out_values;
     }