From d4a83912cd215857f90a29c3a16dbe6e437ddd93 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Fri, 12 May 2017 17:26:08 -0400
Subject: [PATCH 1/5] Reorganize Cython code in the style of lxml so make
 declaring a public C API easier

Change-Id: I3879a8f0546c88959f4468f69f41044455c7978a
---
 python/CMakeLists.txt                      |   6 +-
 python/pyarrow/__init__.pxd                |  33 +++++
 python/pyarrow/__init__.py                 | 108 ++++++++--------
 python/pyarrow/_error.pxd                  |  20 ---
 python/pyarrow/_io.pxd                     |  50 --------
 python/pyarrow/_memory.pxd                 |  30 -----
 python/pyarrow/_parquet.pyx                |   1 -
 python/pyarrow/_table.pxd                  |  62 ---------
 python/pyarrow/{_array.pyx => array.pxi}   | 140 +++++----------------
 python/pyarrow/{_error.pyx => error.pxi}   |   0
 python/pyarrow/filesystem.py               |  14 +--
 python/pyarrow/includes/libarrow.pxd       |  55 +++++++-
 python/pyarrow/includes/pyarrow.pxd        |  75 -----------
 python/pyarrow/{_io.pyx => io.pxi}         |  34 ++---
 python/pyarrow/ipc.py                      |  10 +-
 python/pyarrow/{_array.pxd => lib.pxd}     |  92 +++++++++++++-
 python/pyarrow/lib.pyx                     |  59 +++++++++
 python/pyarrow/{_memory.pyx => memory.pxi} |  11 +-
 python/pyarrow/public-api.pxi              |  97 ++++++++++++++
 python/pyarrow/{_table.pyx => table.pxi}   |  96 ++++----------
 python/setup.py                            |   8 +-
 21 files changed, 468 insertions(+), 533 deletions(-)
 create mode 100644 python/pyarrow/__init__.pxd
 delete mode 100644 python/pyarrow/_error.pxd
 delete mode 100644 python/pyarrow/_io.pxd
 delete mode 100644 python/pyarrow/_memory.pxd
 delete mode 100644 python/pyarrow/_table.pxd
 rename python/pyarrow/{_array.pyx => array.pxi} (91%)
 rename python/pyarrow/{_error.pyx => error.pxi} (100%)
 delete mode 100644 python/pyarrow/includes/pyarrow.pxd
 rename python/pyarrow/{_io.pyx => io.pxi} (97%)
 rename python/pyarrow/{_array.pxd => lib.pxd} (68%)
 create mode 100644 python/pyarrow/lib.pyx
 rename python/pyarrow/{_memory.pyx => memory.pxi} (82%)
 create mode 100644 python/pyarrow/public-api.pxi
 rename python/pyarrow/{_table.pyx => table.pxi} (90%)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index d8287108d4c..44b5aaf7063 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -265,12 +265,8 @@ if (UNIX)
 endif()
 
 set(CYTHON_EXTENSIONS
-  _array
   _config
-  _error
-  _io
-  _memory
-  _table
+  lib
 )
 
 set(LINK_LIBS
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
new file mode 100644
index 00000000000..be2603ac9a5
--- /dev/null
+++ b/python/pyarrow/__init__.pxd
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libcpp.memory cimport shared_ptr
+from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
+                                        CRecordBatch, CSchema,
+                                        CTable, CTensor)
+
+cdef extern from "pyarrow.lib_api.h":
+    cdef int import_pyarrow__lib() except -1
+
+    cdef object wrap_data_type(const shared_ptr[CDataType]& type)
+    cdef object wrap_field(const shared_ptr[CField]& field)
+    cdef object wrap_schema(const shared_ptr[CSchema]& schema)
+    cdef object wrap_array(const shared_ptr[CArray]& sp_array)
+    cdef object wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+    cdef object wrap_column(const shared_ptr[CColumn]& ccolumn)
+    cdef object wrap_table(const shared_ptr[CTable]& ctable)
+    cdef object wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 4d8da9f5a10..7017103f93a 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -28,50 +28,50 @@
 import pyarrow._config
 from pyarrow._config import cpu_count, set_cpu_count
 
-from pyarrow._array import (null, bool_,
-                            int8, int16, int32, int64,
-                            uint8, uint16, uint32, uint64,
-                            time32, time64, timestamp, date32, date64,
-                            float16, float32, float64,
-                            binary, string, decimal,
-                            list_, struct, dictionary, field,
-                            DataType,
-                            DecimalType,
-                            DictionaryType,
-                            FixedSizeBinaryType,
-                            TimestampType,
-                            Time32Type,
-                            Time64Type,
-                            Field,
-                            Schema,
-                            schema,
-                            Array, Tensor,
-                            array,
-                            from_numpy_dtype,
-                            NullArray,
-                            NumericArray, IntegerArray, FloatingPointArray,
-                            BooleanArray,
-                            Int8Array, UInt8Array,
-                            Int16Array, UInt16Array,
-                            Int32Array, UInt32Array,
-                            Int64Array, UInt64Array,
-                            ListArray,
-                            BinaryArray, StringArray,
-                            FixedSizeBinaryArray,
-                            DictionaryArray,
-                            Date32Array, Date64Array,
-                            TimestampArray, Time32Array, Time64Array,
-                            DecimalArray,
-                            ArrayValue, Scalar, NA, NAType,
-                            BooleanValue,
-                            Int8Value, Int16Value, Int32Value, Int64Value,
-                            UInt8Value, UInt16Value, UInt32Value, UInt64Value,
-                            FloatValue, DoubleValue, ListValue,
-                            BinaryValue, StringValue, FixedSizeBinaryValue,
-                            DecimalValue,
-                            Date32Value, Date64Value, TimestampValue)
+from pyarrow.lib import (null, bool_,
+                         int8, int16, int32, int64,
+                         uint8, uint16, uint32, uint64,
+                         time32, time64, timestamp, date32, date64,
+                         float16, float32, float64,
+                         binary, string, decimal,
+                         list_, struct, dictionary, field,
+                         DataType,
+                         DecimalType,
+                         DictionaryType,
+                         FixedSizeBinaryType,
+                         TimestampType,
+                         Time32Type,
+                         Time64Type,
+                         Field,
+                         Schema,
+                         schema,
+                         Array, Tensor,
+                         array,
+                         from_numpy_dtype,
+                         NullArray,
+                         NumericArray, IntegerArray, FloatingPointArray,
+                         BooleanArray,
+                         Int8Array, UInt8Array,
+                         Int16Array, UInt16Array,
+                         Int32Array, UInt32Array,
+                         Int64Array, UInt64Array,
+                         ListArray,
+                         BinaryArray, StringArray,
+                         FixedSizeBinaryArray,
+                         DictionaryArray,
+                         Date32Array, Date64Array,
+                         TimestampArray, Time32Array, Time64Array,
+                         DecimalArray,
+                         ArrayValue, Scalar, NA, NAType,
+                         BooleanValue,
+                         Int8Value, Int16Value, Int32Value, Int64Value,
+                         UInt8Value, UInt16Value, UInt32Value, UInt64Value,
+                         FloatValue, DoubleValue, ListValue,
+                         BinaryValue, StringValue, FixedSizeBinaryValue,
+                         DecimalValue,
+                         Date32Value, Date64Value, TimestampValue)
 
-from pyarrow._io import (HdfsFile, NativeFile, PythonFile,
+from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          Buffer, BufferReader, InMemoryOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
                          frombuffer, read_tensor, write_tensor,
@@ -79,17 +79,17 @@
                          get_record_batch_size, get_tensor_size,
                          have_libhdfs, have_libhdfs3)
 
-from pyarrow._memory import (MemoryPool, total_allocated_bytes,
-                             set_memory_pool, default_memory_pool)
-from pyarrow._table import (ChunkedArray, Column, RecordBatch, Table,
-                            concat_tables)
-from pyarrow._error import (ArrowException,
-                            ArrowKeyError,
-                            ArrowInvalid,
-                            ArrowIOError,
-                            ArrowMemoryError,
-                            ArrowNotImplementedError,
-                            ArrowTypeError)
+from pyarrow.lib import (MemoryPool, total_allocated_bytes,
+                         set_memory_pool, default_memory_pool)
+from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
+                         concat_tables)
+from pyarrow.lib import (ArrowException,
+                         ArrowKeyError,
+                         ArrowInvalid,
+                         ArrowIOError,
+                         ArrowMemoryError,
+                         ArrowNotImplementedError,
+                         ArrowTypeError)
 
 
 def jemalloc_memory_pool():
diff --git a/python/pyarrow/_error.pxd b/python/pyarrow/_error.pxd
deleted file mode 100644
index 4fb46c25faf..00000000000
--- a/python/pyarrow/_error.pxd
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.libarrow cimport CStatus
-
-cdef int check_status(const CStatus& status) nogil except -1
diff --git a/python/pyarrow/_io.pxd b/python/pyarrow/_io.pxd
deleted file mode 100644
index 0c37a09add5..00000000000
--- a/python/pyarrow/_io.pxd
+++ /dev/null
@@ -1,50 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-
-
-cdef class Buffer:
-    cdef:
-        shared_ptr[CBuffer] buffer
-        Py_ssize_t shape[1]
-        Py_ssize_t strides[1]
-
-    cdef init(self, const shared_ptr[CBuffer]& buffer)
-
-
-cdef class NativeFile:
-    cdef:
-        shared_ptr[RandomAccessFile] rd_file
-        shared_ptr[OutputStream] wr_file
-        bint is_readable
-        bint is_writeable
-        bint is_open
-        bint own_file
-
-    # By implementing these "virtual" functions (all functions in Cython
-    # extension classes are technically virtual in the C++ sense) we can expose
-    # the arrow::io abstract file interfaces to other components throughout the
-    # suite of Arrow C++ libraries
-    cdef read_handle(self, shared_ptr[RandomAccessFile]* file)
-    cdef write_handle(self, shared_ptr[OutputStream]* file)
-
-cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
-cdef get_writer(object source, shared_ptr[OutputStream]* writer)
diff --git a/python/pyarrow/_memory.pxd b/python/pyarrow/_memory.pxd
deleted file mode 100644
index bb1af85c8ea..00000000000
--- a/python/pyarrow/_memory.pxd
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.libarrow cimport CMemoryPool, CLoggingMemoryPool
-
-
-cdef class MemoryPool:
-    cdef:
-        CMemoryPool* pool
-
-    cdef init(self, CMemoryPool* pool)
-
-cdef class LoggingMemoryPool(MemoryPool):
-    pass
-
-cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index c06eab26302..b0a029e6960 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -22,7 +22,6 @@
 from cython.operator cimport dereference as deref
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
-cimport pyarrow.includes.pyarrow as pyarrow
 from pyarrow._array cimport Array, Schema, box_schema
 from pyarrow._error cimport check_status
 from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
diff --git a/python/pyarrow/_table.pxd b/python/pyarrow/_table.pxd
deleted file mode 100644
index e61e90d7462..00000000000
--- a/python/pyarrow/_table.pxd
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyarrow.includes.common cimport shared_ptr
-from pyarrow.includes.libarrow cimport (CChunkedArray, CColumn, CTable,
-                                        CRecordBatch)
-from pyarrow._array cimport Schema
-
-
-cdef class ChunkedArray:
-    cdef:
-        shared_ptr[CChunkedArray] sp_chunked_array
-        CChunkedArray* chunked_array
-
-    cdef init(self, const shared_ptr[CChunkedArray]& chunked_array)
-    cdef _check_nullptr(self)
-
-
-cdef class Column:
-    cdef:
-        shared_ptr[CColumn] sp_column
-        CColumn* column
-
-    cdef init(self, const shared_ptr[CColumn]& column)
-    cdef _check_nullptr(self)
-
-
-cdef class Table:
-    cdef:
-        shared_ptr[CTable] sp_table
-        CTable* table
-
-    cdef init(self, const shared_ptr[CTable]& table)
-    cdef _check_nullptr(self)
-
-
-cdef class RecordBatch:
-    cdef:
-        shared_ptr[CRecordBatch] sp_batch
-        CRecordBatch* batch
-        Schema _schema
-
-    cdef init(self, const shared_ptr[CRecordBatch]& table)
-    cdef _check_nullptr(self)
-
-cdef object box_column(const shared_ptr[CColumn]& ccolumn)
-cdef api object table_from_ctable(const shared_ptr[CTable]& ctable)
-cdef api object batch_from_cbatch(const shared_ptr[CRecordBatch]& cbatch)
diff --git a/python/pyarrow/_array.pyx b/python/pyarrow/array.pxi
similarity index 91%
rename from python/pyarrow/_array.pyx
rename to python/pyarrow/array.pxi
index f01cff6cc99..a115cfdb36c 100644
--- a/python/pyarrow/_array.pyx
+++ b/python/pyarrow/array.pxi
@@ -15,31 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
 from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.common cimport PyObject_to_object
-cimport pyarrow.includes.pyarrow as pyarrow
-from pyarrow._error cimport check_status
-from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
-cimport cpython as cp
-
-
-import datetime
-import decimal as _pydecimal
-import numpy as np
-import six
-import pyarrow._config
-from pyarrow.compat import frombytes, tobytes, PandasSeries, Categorical
-
-
-cdef _pandas():
-    import pandas as pd
-    return pd
-
 
 # These are imprecise because the type (in pandas 0.x) depends on the presence
 # of nulls
@@ -186,7 +162,7 @@ cdef class Field:
     cdef init(self, const shared_ptr[CField]& field):
         self.sp_field = field
         self.field = field.get()
-        self.type = box_data_type(field.get().type())
+        self.type = wrap_data_type(field.get().type())
 
     def equals(self, Field other):
         """
@@ -244,7 +220,7 @@ cdef class Field:
         with nogil:
             check_status(self.field.AddMetadata(c_meta, &new_field))
 
-        return box_field(new_field)
+        return wrap_field(new_field)
 
     def remove_metadata(self):
         """
@@ -257,7 +233,7 @@ cdef class Field:
         cdef shared_ptr[CField] new_field
         with nogil:
             new_field = self.field.RemoveMetadata()
-        return box_field(new_field)
+        return wrap_field(new_field)
 
 
 cdef class Schema:
@@ -274,7 +250,7 @@ cdef class Schema:
 
         cdef Field result = Field()
         result.init(self.schema.field(i))
-        result.type = box_data_type(result.field.type())
+        result.type = wrap_data_type(result.field.type())
 
         return result
 
@@ -322,7 +298,7 @@ cdef class Schema:
         -------
         field: pyarrow.Field
         """
-        return box_field(self.schema.GetFieldByName(tobytes(name)))
+        return wrap_field(self.schema.GetFieldByName(tobytes(name)))
 
     def add_metadata(self, dict metadata):
         """
@@ -344,7 +320,7 @@ cdef class Schema:
         with nogil:
             check_status(self.schema.AddMetadata(c_meta, &new_schema))
 
-        return box_schema(new_schema)
+        return wrap_schema(new_schema)
 
     def remove_metadata(self):
         """
@@ -357,7 +333,7 @@ cdef class Schema:
         cdef shared_ptr[CSchema] new_schema
         with nogil:
             new_schema = self.schema.RemoveMetadata()
-        return box_schema(new_schema)
+        return wrap_schema(new_schema)
 
     def __str__(self):
         return frombytes(self.schema.ToString())
@@ -383,7 +359,7 @@ cdef DataType primitive_type(Type type):
         return _type_cache[type]
 
     cdef DataType out = DataType()
-    out.init(pyarrow.GetPrimitiveType(type))
+    out.init(GetPrimitiveType(type))
 
     _type_cache[type] = out
     return out
@@ -604,7 +580,7 @@ def float64():
 cpdef DataType decimal(int precision, int scale=0):
     cdef shared_ptr[CDataType] decimal_type
     decimal_type.reset(new CDecimalType(precision, scale))
-    return box_data_type(decimal_type)
+    return wrap_data_type(decimal_type)
 
 
 def string():
@@ -629,7 +605,7 @@ def binary(int length=-1):
 
     cdef shared_ptr[CDataType] fixed_size_binary_type
     fixed_size_binary_type.reset(new CFixedSizeBinaryType(length))
-    return box_data_type(fixed_size_binary_type)
+    return wrap_data_type(fixed_size_binary_type)
 
 
 def list_(DataType value_type):
@@ -695,49 +671,15 @@ def schema(fields):
     return result
 
 
-cdef DataType box_data_type(const shared_ptr[CDataType]& type):
-    cdef:
-        DataType out
-
-    if type.get() == NULL:
-        return None
-
-    if type.get().id() == _Type_DICTIONARY:
-        out = DictionaryType()
-    elif type.get().id() == _Type_TIMESTAMP:
-        out = TimestampType()
-    elif type.get().id() == _Type_FIXED_SIZE_BINARY:
-        out = FixedSizeBinaryType()
-    elif type.get().id() == _Type_DECIMAL:
-        out = DecimalType()
-    else:
-        out = DataType()
-
-    out.init(type)
-    return out
-
-cdef Field box_field(const shared_ptr[CField]& field):
-    if field.get() == NULL:
-        return None
-    cdef Field out = Field()
-    out.init(field)
-    return out
-
-cdef Schema box_schema(const shared_ptr[CSchema]& type):
-    cdef Schema out = Schema()
-    out.init_schema(type)
-    return out
-
-
 def from_numpy_dtype(object dtype):
     """
     Convert NumPy dtype to pyarrow.DataType
     """
     cdef shared_ptr[CDataType] c_type
     with nogil:
-        check_status(pyarrow.NumPyDtypeToArrow(dtype, &c_type))
+        check_status(NumPyDtypeToArrow(dtype, &c_type))
 
-    return box_data_type(c_type)
+    return wrap_data_type(c_type)
 
 
 NA = None
@@ -960,7 +902,7 @@ cdef class ListValue(ArrayValue):
     cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
         self.ap = <CListArray*> sp_array.get()
-        self.value_type = box_data_type(self.ap.value_type())
+        self.value_type = wrap_data_type(self.ap.value_type())
 
     cdef getitem(self, int64_t i):
         cdef int64_t j = self.ap.value_offset(self.index) + i
@@ -1076,15 +1018,15 @@ def array(object sequence, DataType type=None, MemoryPool memory_pool=None):
 
     pool = maybe_unbox_memory_pool(memory_pool)
     if type is None:
-        check_status(pyarrow.ConvertPySequence(sequence, pool, &sp_array))
+        check_status(ConvertPySequence(sequence, pool, &sp_array))
     else:
         check_status(
-            pyarrow.ConvertPySequence(
+            ConvertPySequence(
                 sequence, pool, &sp_array, type.sp_type
             )
         )
 
-    return box_array(sp_array)
+    return wrap_array(sp_array)
 
 
 
@@ -1093,7 +1035,7 @@ cdef class Array:
     cdef init(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
         self.ap = sp_array.get()
-        self.type = box_data_type(self.sp_array.get().type())
+        self.type = wrap_data_type(self.sp_array.get().type())
 
     @staticmethod
     def from_pandas(obj, mask=None, DataType type=None,
@@ -1172,22 +1114,22 @@ cdef class Array:
             if type is not None:
                 c_type = type.sp_type
             with nogil:
-                check_status(pyarrow.PandasObjectsToArrow(
+                check_status(PandasObjectsToArrow(
                     pool, values, mask, c_type, &out))
         else:
             values, type = maybe_coerce_datetime64(
                 values, obj.dtype, type, timestamps_to_ms=timestamps_to_ms)
 
             if type is None:
-                check_status(pyarrow.NumPyDtypeToArrow(values.dtype, &c_type))
+                check_status(NumPyDtypeToArrow(values.dtype, &c_type))
             else:
                 c_type = type.sp_type
 
             with nogil:
-                check_status(pyarrow.PandasToArrow(
+                check_status(PandasToArrow(
                     pool, values, mask, c_type, &out))
 
-        return box_array(out)
+        return wrap_array(out)
 
     property null_count:
 
@@ -1271,7 +1213,7 @@ cdef class Array:
         else:
             result = self.ap.Slice(offset, length)
 
-        return box_array(result)
+        return wrap_array(result)
 
     def to_pandas(self):
         """
@@ -1287,8 +1229,7 @@ cdef class Array:
             PyObject* out
 
         with nogil:
-            check_status(
-                pyarrow.ConvertArrayToPandas(self.sp_array, self, &out))
+            check_status(ConvertArrayToPandas(self.sp_array, self, &out))
         return wrap_array_output(out)
 
     def to_pylist(self):
@@ -1303,7 +1244,7 @@ cdef class Tensor:
     cdef init(self, const shared_ptr[CTensor]& sp_tensor):
         self.sp_tensor = sp_tensor
         self.tp = sp_tensor.get()
-        self.type = box_data_type(self.tp.type())
+        self.type = wrap_data_type(self.tp.type())
 
     def __repr__(self):
         return """<pyarrow.Tensor>
@@ -1314,9 +1255,8 @@ strides: {2}""".format(self.type, self.shape, self.strides)
     @staticmethod
     def from_numpy(obj):
         cdef shared_ptr[CTensor] ctensor
-        check_status(pyarrow.NdarrayToTensor(default_memory_pool(),
-                                             obj, &ctensor))
-        return box_tensor(ctensor)
+        check_status(NdarrayToTensor(c_default_memory_pool(), obj, &ctensor))
+        return wrap_tensor(ctensor)
 
     def to_numpy(self):
         """
@@ -1325,7 +1265,7 @@ strides: {2}""".format(self.type, self.shape, self.strides)
         cdef:
             PyObject* out
 
-        check_status(pyarrow.TensorToNdarray(deref(self.tp), self, &out))
+        check_status(TensorToNdarray(deref(self.tp), self, &out))
         return PyObject_to_object(out)
 
     def equals(self, Tensor other):
@@ -1502,7 +1442,7 @@ cdef class DictionaryArray(Array):
             cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
 
             if self._dictionary is None:
-                self._dictionary = box_array(darr.dictionary())
+                self._dictionary = wrap_array(darr.dictionary())
 
             return self._dictionary
 
@@ -1512,7 +1452,7 @@ cdef class DictionaryArray(Array):
             cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
 
             if self._indices is None:
-                self._indices = box_array(darr.indices())
+                self._indices = wrap_array(darr.indices())
 
             return self._indices
 
@@ -1597,28 +1537,6 @@ cdef dict _array_classes = {
     _Type_DECIMAL: DecimalArray,
 }
 
-cdef object box_array(const shared_ptr[CArray]& sp_array):
-    if sp_array.get() == NULL:
-        raise ValueError('Array was NULL')
-
-    cdef CDataType* data_type = sp_array.get().type().get()
-
-    if data_type == NULL:
-        raise ValueError('Array data type was NULL')
-
-    cdef Array arr = _array_classes[data_type.id()]()
-    arr.init(sp_array)
-    return arr
-
-
-cdef object box_tensor(const shared_ptr[CTensor]& sp_tensor):
-    if sp_tensor.get() == NULL:
-        raise ValueError('Tensor was NULL')
-
-    cdef Tensor tensor = Tensor()
-    tensor.init(sp_tensor)
-    return tensor
-
 
 cdef object get_series_values(object obj):
     if isinstance(obj, PandasSeries):
diff --git a/python/pyarrow/_error.pyx b/python/pyarrow/error.pxi
similarity index 100%
rename from python/pyarrow/_error.pyx
rename to python/pyarrow/error.pxi
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
index 92dd91ce9de..ac37fd87294 100644
--- a/python/pyarrow/filesystem.py
+++ b/python/pyarrow/filesystem.py
@@ -19,7 +19,7 @@
 import os
 
 from pyarrow.util import implements
-import pyarrow._io as io
+import pyarrow.lib as lib
 
 
 class Filesystem(object):
@@ -133,7 +133,7 @@ def open(self, path, mode='rb'):
         return open(path, mode=mode)
 
 
-class HdfsClient(io._HdfsClient, Filesystem):
+class HdfsClient(lib._HdfsClient, Filesystem):
     """
     Connect to an HDFS cluster. All parameters are optional and should
     only be set if the defaults need to be overridden.
@@ -168,19 +168,19 @@ def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
 
     @implements(Filesystem.isdir)
     def isdir(self, path):
-        return io._HdfsClient.isdir(self, path)
+        return lib._HdfsClient.isdir(self, path)
 
     @implements(Filesystem.isfile)
     def isfile(self, path):
-        return io._HdfsClient.isfile(self, path)
+        return lib._HdfsClient.isfile(self, path)
 
     @implements(Filesystem.delete)
     def delete(self, path, recursive=False):
-        return io._HdfsClient.delete(self, path, recursive)
+        return lib._HdfsClient.delete(self, path, recursive)
 
     @implements(Filesystem.mkdir)
     def mkdir(self, path, create_parents=True):
-        return io._HdfsClient.mkdir(self, path)
+        return lib._HdfsClient.mkdir(self, path)
 
     def ls(self, path, full_info=False):
         """
@@ -196,4 +196,4 @@ def ls(self, path, full_info=False):
         -------
         result : list of dicts (full_info=True) or strings (full_info=False)
         """
-        return io._HdfsClient.ls(self, path, full_info)
+        return lib._HdfsClient.ls(self, path, full_info)
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8a730b39884..9ed13b3e898 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -149,7 +149,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         PoolBuffer()
         PoolBuffer(CMemoryPool*)
 
-    cdef CMemoryPool* default_memory_pool()
+    cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"()
 
     cdef cppclass CListType" arrow::ListType"(CDataType):
         CListType(const shared_ptr[CDataType]& value_type)
@@ -625,3 +625,56 @@ cdef extern from "arrow/ipc/feather.h" namespace "arrow::ipc::feather" nogil:
 
         CStatus GetColumn(int i, shared_ptr[CColumn]* out)
         c_string GetColumnName(int i)
+
+
+cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
+    shared_ptr[CDataType] GetPrimitiveType(Type type)
+    shared_ptr[CDataType] GetTimestampType(TimeUnit unit)
+    CStatus ConvertPySequence(object obj, CMemoryPool* pool,
+                              shared_ptr[CArray]* out)
+    CStatus ConvertPySequence(object obj, CMemoryPool* pool,
+                              shared_ptr[CArray]* out,
+                              const shared_ptr[CDataType]& type)
+
+    CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
+
+    CStatus PandasToArrow(CMemoryPool* pool, object ao, object mo,
+                          const shared_ptr[CDataType]& type,
+                          shared_ptr[CArray]* out)
+
+    CStatus PandasObjectsToArrow(CMemoryPool* pool, object ao, object mo,
+                                 const shared_ptr[CDataType]& type,
+                                 shared_ptr[CArray]* out)
+
+    CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
+                            shared_ptr[CTensor]* out);
+
+    CStatus TensorToNdarray(const CTensor& tensor, object base,
+                            PyObject** out)
+
+    CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr,
+                                 object py_ref, PyObject** out)
+
+    CStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr,
+                                  object py_ref, PyObject** out)
+
+    CStatus ConvertTableToPandas(const shared_ptr[CTable]& table,
+                                 int nthreads, PyObject** out)
+
+    void c_set_default_memory_pool \
+        " arrow::py::set_default_memory_pool"(CMemoryPool* pool)\
+
+    CMemoryPool* c_get_memory_pool \
+        " arrow::py::get_memory_pool"()
+
+    cdef cppclass PyBuffer(CBuffer):
+        PyBuffer(object o)
+
+    cdef cppclass PyReadableFile(RandomAccessFile):
+        PyReadableFile(object fo)
+
+    cdef cppclass PyOutputStream(OutputStream):
+        PyOutputStream(object fo)
+
+    cdef cppclass PyBytesReader(CBufferReader):
+        PyBytesReader(object fo)
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
deleted file mode 100644
index 35c71107f8d..00000000000
--- a/python/pyarrow/includes/pyarrow.pxd
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
-                                        CTable, CTensor, CStatus, Type,
-                                        CMemoryPool, TimeUnit,
-                                        RandomAccessFile, OutputStream,
-                                        CBufferReader)
-
-
-cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
-    shared_ptr[CDataType] GetPrimitiveType(Type type)
-    shared_ptr[CDataType] GetTimestampType(TimeUnit unit)
-    CStatus ConvertPySequence(object obj, CMemoryPool* pool,
-                              shared_ptr[CArray]* out)
-    CStatus ConvertPySequence(object obj, CMemoryPool* pool,
-                              shared_ptr[CArray]* out,
-                              const shared_ptr[CDataType]& type)
-
-    CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
-
-    CStatus PandasToArrow(CMemoryPool* pool, object ao, object mo,
-                          const shared_ptr[CDataType]& type,
-                          shared_ptr[CArray]* out)
-
-    CStatus PandasObjectsToArrow(CMemoryPool* pool, object ao, object mo,
-                                 const shared_ptr[CDataType]& type,
-                                 shared_ptr[CArray]* out)
-
-    CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
-                            shared_ptr[CTensor]* out);
-
-    CStatus TensorToNdarray(const CTensor& tensor, object base,
-                            PyObject** out)
-
-    CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr,
-                                 object py_ref, PyObject** out)
-
-    CStatus ConvertColumnToPandas(const shared_ptr[CColumn]& arr,
-                                  object py_ref, PyObject** out)
-
-    CStatus ConvertTableToPandas(const shared_ptr[CTable]& table,
-                                 int nthreads, PyObject** out)
-
-    void set_default_memory_pool(CMemoryPool* pool)
-    CMemoryPool* get_memory_pool()
-
-    cdef cppclass PyBuffer(CBuffer):
-        PyBuffer(object o)
-
-    cdef cppclass PyReadableFile(RandomAccessFile):
-        PyReadableFile(object fo)
-
-    cdef cppclass PyOutputStream(OutputStream):
-        PyOutputStream(object fo)
-
-    cdef cppclass PyBytesReader(CBufferReader):
-        PyBytesReader(object fo)
diff --git a/python/pyarrow/_io.pyx b/python/pyarrow/io.pxi
similarity index 97%
rename from python/pyarrow/_io.pyx
rename to python/pyarrow/io.pxi
index e9e2ba01c06..13e8b638723 100644
--- a/python/pyarrow/_io.pyx
+++ b/python/pyarrow/io.pxi
@@ -18,22 +18,7 @@
 # Cython wrappers for IO interfaces defined in arrow::io and messaging in
 # arrow::ipc
 
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
 from libc.stdlib cimport malloc, free
-from pyarrow.includes.libarrow cimport *
-cimport pyarrow.includes.pyarrow as pyarrow
-from pyarrow._array cimport Array, Tensor, box_tensor, Schema
-from pyarrow._error cimport check_status
-from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
-from pyarrow._table cimport (Column, RecordBatch, batch_from_cbatch,
-                             table_from_ctable)
-cimport cpython as cp
-
-import pyarrow._config
 from pyarrow.compat import frombytes, tobytes, encode_file_path
 
 import re
@@ -52,6 +37,7 @@ cdef extern from "Python.h":
     PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"(
         char *v, Py_ssize_t len) except NULL
 
+
 cdef class NativeFile:
 
     def __cinit__(self):
@@ -315,11 +301,11 @@ cdef class PythonFile(NativeFile):
         self.handle = handle
 
         if mode.startswith('w'):
-            self.wr_file.reset(new pyarrow.PyOutputStream(handle))
+            self.wr_file.reset(new PyOutputStream(handle))
             self.is_readable = 0
             self.is_writeable = 1
         elif mode.startswith('r'):
-            self.rd_file.reset(new pyarrow.PyReadableFile(handle))
+            self.rd_file.reset(new PyReadableFile(handle))
             self.is_readable = 1
             self.is_writeable = 0
         else:
@@ -554,7 +540,7 @@ cdef class BufferReader(NativeFile):
 
     Parameters
     ----------
-    obj : Python bytes or pyarrow.io.Buffer
+    obj : Python bytes or pyarrow.Buffer
     """
     cdef:
         Buffer buffer
@@ -579,7 +565,7 @@ def frombuffer(object obj):
     cdef shared_ptr[CBuffer] buf
     try:
         memoryview(obj)
-        buf.reset(new pyarrow.PyBuffer(obj))
+        buf.reset(new PyBuffer(obj))
         return wrap_buffer(buf)
     except TypeError:
         raise ValueError('Must pass object that implements buffer protocol')
@@ -1007,7 +993,7 @@ cdef class _StreamReader:
         if batch.get() == NULL:
             raise StopIteration
 
-        return batch_from_cbatch(batch)
+        return wrap_batch(batch)
 
     def read_all(self):
         """
@@ -1027,7 +1013,7 @@ cdef class _StreamReader:
 
             check_status(CTable.FromRecordBatches(batches, &table))
 
-        return table_from_ctable(table)
+        return wrap_table(table)
 
 
 cdef class _FileWriter(_StreamWriter):
@@ -1080,7 +1066,7 @@ cdef class _FileReader:
         with nogil:
             check_status(self.reader.get().GetRecordBatch(i, &batch))
 
-        return batch_from_cbatch(batch)
+        return wrap_batch(batch)
 
     # TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
     # time has passed
@@ -1103,7 +1089,7 @@ cdef class _FileReader:
                 check_status(self.reader.get().GetRecordBatch(i, &batches[i]))
             check_status(CTable.FromRecordBatches(batches, &table))
 
-        return table_from_ctable(table)
+        return wrap_table(table)
 
 
 #----------------------------------------------------------------------
@@ -1271,4 +1257,4 @@ def read_tensor(NativeFile source):
     with nogil:
         check_status(ReadTensor(offset, source.rd_file.get(), &sp_tensor))
 
-    return box_tensor(sp_tensor)
+    return wrap_tensor(sp_tensor)
diff --git a/python/pyarrow/ipc.py b/python/pyarrow/ipc.py
index f96ead3b923..c37a1ce7df1 100644
--- a/python/pyarrow/ipc.py
+++ b/python/pyarrow/ipc.py
@@ -17,10 +17,10 @@
 
 # Arrow file and stream reader/writer classes, and other messaging tools
 
-import pyarrow._io as _io
+import pyarrow.lib as lib
 
 
-class StreamReader(_io._StreamReader):
+class StreamReader(lib._StreamReader):
     """
     Reader for the Arrow streaming binary format
 
@@ -37,7 +37,7 @@ def __iter__(self):
             yield self.get_next_batch()
 
 
-class StreamWriter(_io._StreamWriter):
+class StreamWriter(lib._StreamWriter):
     """
     Writer for the Arrow streaming binary format
 
@@ -52,7 +52,7 @@ def __init__(self, sink, schema):
         self._open(sink, schema)
 
 
-class FileReader(_io._FileReader):
+class FileReader(lib._FileReader):
     """
     Class for reading Arrow record batch data from the Arrow binary file format
 
@@ -68,7 +68,7 @@ def __init__(self, source, footer_offset=None):
         self._open(source, footer_offset=footer_offset)
 
 
-class FileWriter(_io._FileWriter):
+class FileWriter(lib._FileWriter):
     """
     Writer to create the Arrow binary file format
 
diff --git a/python/pyarrow/_array.pxd b/python/pyarrow/lib.pxd
similarity index 68%
rename from python/pyarrow/_array.pxd
rename to python/pyarrow/lib.pxd
index 464de316f04..08d4a5d0e70 100644
--- a/python/pyarrow/_array.pxd
+++ b/python/pyarrow/lib.pxd
@@ -17,13 +17,32 @@
 
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
-
 from cpython cimport PyObject
 
 cdef extern from "Python.h":
     int PySlice_Check(object)
 
 
+from pyarrow.includes.libarrow cimport CStatus
+
+
+cdef int check_status(const CStatus& status) nogil except -1
+
+
+cdef class MemoryPool:
+    cdef:
+        CMemoryPool* pool
+
+    cdef init(self, CMemoryPool* pool)
+
+
+cdef class LoggingMemoryPool(MemoryPool):
+    pass
+
+
+cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool)
+
+
 cdef class DataType:
     cdef:
         shared_ptr[CDataType] sp_type
@@ -237,11 +256,72 @@ cdef class DictionaryArray(Array):
 
 
 cdef wrap_array_output(PyObject* output)
-cdef DataType box_data_type(const shared_ptr[CDataType]& type)
-cdef Field box_field(const shared_ptr[CField]& field)
-cdef Schema box_schema(const shared_ptr[CSchema]& schema)
-cdef object box_array(const shared_ptr[CArray]& sp_array)
-cdef object box_tensor(const shared_ptr[CTensor]& sp_tensor)
 cdef object box_scalar(DataType type,
                        const shared_ptr[CArray]& sp_array,
                        int64_t index)
+
+
+cdef class ChunkedArray:
+    cdef:
+        shared_ptr[CChunkedArray] sp_chunked_array
+        CChunkedArray* chunked_array
+
+    cdef init(self, const shared_ptr[CChunkedArray]& chunked_array)
+    cdef _check_nullptr(self)
+
+
+cdef class Column:
+    cdef:
+        shared_ptr[CColumn] sp_column
+        CColumn* column
+
+    cdef init(self, const shared_ptr[CColumn]& column)
+    cdef _check_nullptr(self)
+
+
+cdef class Table:
+    cdef:
+        shared_ptr[CTable] sp_table
+        CTable* table
+
+    cdef init(self, const shared_ptr[CTable]& table)
+    cdef _check_nullptr(self)
+
+
+cdef class RecordBatch:
+    cdef:
+        shared_ptr[CRecordBatch] sp_batch
+        CRecordBatch* batch
+        Schema _schema
+
+    cdef init(self, const shared_ptr[CRecordBatch]& table)
+    cdef _check_nullptr(self)
+
+
+cdef class Buffer:
+    cdef:
+        shared_ptr[CBuffer] buffer
+        Py_ssize_t shape[1]
+        Py_ssize_t strides[1]
+
+    cdef init(self, const shared_ptr[CBuffer]& buffer)
+
+
+cdef class NativeFile:
+    cdef:
+        shared_ptr[RandomAccessFile] rd_file
+        shared_ptr[OutputStream] wr_file
+        bint is_readable
+        bint is_writeable
+        bint is_open
+        bint own_file
+
+    # By implementing these "virtual" functions (all functions in Cython
+    # extension classes are technically virtual in the C++ sense) we can expose
+    # the arrow::io abstract file interfaces to other components throughout the
+    # suite of Arrow C++ libraries
+    cdef read_handle(self, shared_ptr[RandomAccessFile]* file)
+    cdef write_handle(self, shared_ptr[OutputStream]* file)
+
+cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
+cdef get_writer(object source, shared_ptr[OutputStream]* writer)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
new file mode 100644
index 00000000000..b47630459e4
--- /dev/null
+++ b/python/pyarrow/lib.pyx
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+from cython.operator cimport dereference as deref
+from pyarrow.includes.libarrow cimport *
+from pyarrow.includes.common cimport PyObject_to_object
+cimport pyarrow.includes.libarrow as libarrow
+cimport cpython as cp
+
+
+import datetime
+import decimal as _pydecimal
+import numpy as np
+import six
+import pyarrow._config
+from pyarrow.compat import frombytes, tobytes, PandasSeries, Categorical
+
+
+cdef _pandas():
+    import pandas as pd
+    return pd
+
+# Exception types
+include "error.pxi"
+
+# Memory pools and allocation
+include "memory.pxi"
+
+# Array types
+include "array.pxi"
+
+# Column, Table, Record Batch
+include "table.pxi"
+
+# File IO, IPC
+include "io.pxi"
+
+#----------------------------------------------------------------------
+# Public API
+
+include "public-api.pxi"
diff --git a/python/pyarrow/_memory.pyx b/python/pyarrow/memory.pxi
similarity index 82%
rename from python/pyarrow/_memory.pyx
rename to python/pyarrow/memory.pxi
index 8b73a17553e..15d59d237ad 100644
--- a/python/pyarrow/_memory.pyx
+++ b/python/pyarrow/memory.pxi
@@ -19,9 +19,6 @@
 # distutils: language = c++
 # cython: embedsignature = True
 
-from pyarrow.includes.libarrow cimport CMemoryPool, CLoggingMemoryPool
-from pyarrow.includes.pyarrow cimport set_default_memory_pool, get_memory_pool
-
 
 cdef class MemoryPool:
     cdef init(self, CMemoryPool* pool):
@@ -33,7 +30,7 @@ cdef class MemoryPool:
 
 cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
     if memory_pool is None:
-        return get_memory_pool()
+        return c_get_memory_pool()
     else:
         return memory_pool.pool
 
@@ -45,14 +42,14 @@ cdef class LoggingMemoryPool(MemoryPool):
 def default_memory_pool():
     cdef:
         MemoryPool pool = MemoryPool()
-    pool.init(get_memory_pool())
+    pool.init(c_get_memory_pool())
     return pool
 
 
 def set_memory_pool(MemoryPool pool):
-    set_default_memory_pool(pool.pool)
+    c_set_default_memory_pool(pool.pool)
 
 
 def total_allocated_bytes():
-    cdef CMemoryPool* pool = get_memory_pool()
+    cdef CMemoryPool* pool = c_get_memory_pool()
     return pool.bytes_allocated()
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
new file mode 100644
index 00000000000..32b27ac93a0
--- /dev/null
+++ b/python/pyarrow/public-api.pxi
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from libcpp.memory cimport shared_ptr
+from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
+                                        CRecordBatch, CSchema,
+                                        CTable, CTensor)
+
+cdef public api object wrap_data_type(const shared_ptr[CDataType]& type):
+    cdef:
+        DataType out
+
+    if type.get() == NULL:
+        return None
+
+    if type.get().id() == _Type_DICTIONARY:
+        out = DictionaryType()
+    elif type.get().id() == _Type_TIMESTAMP:
+        out = TimestampType()
+    elif type.get().id() == _Type_FIXED_SIZE_BINARY:
+        out = FixedSizeBinaryType()
+    elif type.get().id() == _Type_DECIMAL:
+        out = DecimalType()
+    else:
+        out = DataType()
+
+    out.init(type)
+    return out
+
+
+cdef public api object wrap_field(const shared_ptr[CField]& field):
+    if field.get() == NULL:
+        return None
+    cdef Field out = Field()
+    out.init(field)
+    return out
+
+
+cdef public api object wrap_schema(const shared_ptr[CSchema]& type):
+    cdef Schema out = Schema()
+    out.init_schema(type)
+    return out
+
+
+cdef public api object wrap_array(const shared_ptr[CArray]& sp_array):
+    if sp_array.get() == NULL:
+        raise ValueError('Array was NULL')
+
+    cdef CDataType* data_type = sp_array.get().type().get()
+
+    if data_type == NULL:
+        raise ValueError('Array data type was NULL')
+
+    cdef Array arr = _array_classes[data_type.id()]()
+    arr.init(sp_array)
+    return arr
+
+
+cdef public api object wrap_tensor(const shared_ptr[CTensor]& sp_tensor):
+    if sp_tensor.get() == NULL:
+        raise ValueError('Tensor was NULL')
+
+    cdef Tensor tensor = Tensor()
+    tensor.init(sp_tensor)
+    return tensor
+
+
+cdef public api object wrap_column(const shared_ptr[CColumn]& ccolumn):
+    cdef Column column = Column()
+    column.init(ccolumn)
+    return column
+
+
+cdef public api object wrap_table(const shared_ptr[CTable]& ctable):
+    cdef Table table = Table()
+    table.init(ctable)
+    return table
+
+
+cdef public api object wrap_batch(const shared_ptr[CRecordBatch]& cbatch):
+    cdef RecordBatch batch = RecordBatch()
+    batch.init(cbatch)
+    return batch
diff --git a/python/pyarrow/_table.pyx b/python/pyarrow/table.pxi
similarity index 90%
rename from python/pyarrow/_table.pyx
rename to python/pyarrow/table.pxi
index 223fe27ea98..a78817b4aff 100644
--- a/python/pyarrow/_table.pyx
+++ b/python/pyarrow/table.pxi
@@ -15,33 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# cython: profile=False
-# distutils: language = c++
-# cython: embedsignature = True
-
-from cython.operator cimport dereference as deref
-
-from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.common cimport *
-cimport pyarrow.includes.pyarrow as pyarrow
-from pyarrow._array cimport (Array, box_array, wrap_array_output,
-                             box_data_type, box_schema, DataType, Field)
-from pyarrow._error cimport check_status
-cimport cpython
-
-import pyarrow._config
-from pyarrow._error import ArrowException
-from pyarrow._array import field
-from pyarrow.compat import frombytes, tobytes
-
 from collections import OrderedDict
 
 
-cdef _pandas():
-    import pandas as pd
-    return pd
-
-
 cdef class ChunkedArray:
     """
     Array backed via one or more memory chunks.
@@ -104,10 +80,10 @@ cdef class ChunkedArray:
 
         Returns
         -------
-        pyarrow.array.Array
+        pyarrow.Array
         """
         self._check_nullptr()
-        return box_array(self.chunked_array.chunk(i))
+        return wrap_array(self.chunked_array.chunk(i))
 
     def iterchunks(self):
         for i in range(self.num_chunks):
@@ -150,7 +126,7 @@ cdef class Column:
 
         cdef shared_ptr[CColumn] sp_column
         sp_column.reset(new CColumn(boxed_field.sp_field, arr.sp_array))
-        return box_column(sp_column)
+        return wrap_column(sp_column)
 
     def to_pandas(self):
         """
@@ -163,8 +139,8 @@ cdef class Column:
         cdef:
             PyObject* out
 
-        check_status(pyarrow.ConvertColumnToPandas(self.sp_column,
-                                                   self, &out))
+        check_status(libarrow.ConvertColumnToPandas(self.sp_column,
+                                                    self, &out))
 
         return _pandas().Series(wrap_array_output(out), name=self.name)
 
@@ -254,9 +230,9 @@ cdef class Column:
 
         Returns
         -------
-        pyarrow.schema.DataType
+        pyarrow.DataType
         """
-        return box_data_type(self.column.type())
+        return wrap_data_type(self.column.type())
 
     @property
     def data(self):
@@ -265,7 +241,7 @@ cdef class Column:
 
         Returns
         -------
-        pyarrow.table.ChunkedArray
+        pyarrow.ChunkedArray
         """
         cdef ChunkedArray chunked_array = ChunkedArray()
         chunked_array.init(self.column.data())
@@ -396,7 +372,7 @@ cdef class RecordBatch:
 
         Returns
         -------
-        pyarrow.schema.Schema
+        pyarrow.Schema
         """
         cdef Schema schema
         self._check_nullptr()
@@ -408,7 +384,7 @@ cdef class RecordBatch:
         return self._schema
 
     def __getitem__(self, i):
-        return box_array(self.batch.column(i))
+        return wrap_array(self.batch.column(i))
 
     def slice(self, offset=0, length=None):
         """
@@ -436,7 +412,7 @@ cdef class RecordBatch:
         else:
             result = self.batch.Slice(offset, length)
 
-        return batch_from_cbatch(result)
+        return wrap_batch(result)
 
     def equals(self, RecordBatch other):
         cdef:
@@ -492,7 +468,7 @@ cdef class RecordBatch:
 
         Returns
         -------
-        pyarrow.table.RecordBatch
+        pyarrow.RecordBatch
         """
         names, arrays, metadata = _dataframe_to_arrays(df, False, schema)
         return cls.from_arrays(arrays, names, metadata)
@@ -511,7 +487,7 @@ cdef class RecordBatch:
 
         Returns
         -------
-        pyarrow.table.RecordBatch
+        pyarrow.RecordBatch
         """
         cdef:
             Array arr
@@ -534,7 +510,7 @@ cdef class RecordBatch:
             c_arrays.push_back(arr.sp_array)
 
         batch.reset(new CRecordBatch(schema, num_rows, c_arrays))
-        return batch_from_cbatch(batch)
+        return wrap_batch(batch)
 
 
 cdef table_to_blockmanager(const shared_ptr[CTable]& table, int nthreads):
@@ -548,8 +524,8 @@ cdef table_to_blockmanager(const shared_ptr[CTable]& table, int nthreads):
     from pyarrow.compat import DatetimeTZDtype
 
     with nogil:
-        check_status(pyarrow.ConvertTableToPandas(table, nthreads,
-                                                  &result_obj))
+        check_status(libarrow.ConvertTableToPandas(table, nthreads,
+                                                   &result_obj))
 
     result = PyObject_to_object(result_obj)
 
@@ -652,7 +628,7 @@ cdef class Table:
 
         Returns
         -------
-        pyarrow.table.Table
+        pyarrow.Table
 
         Examples
         --------
@@ -664,7 +640,7 @@ cdef class Table:
             ...     'str': ['a', 'b']
             ... })
         >>> pa.Table.from_pandas(df)
-        <pyarrow.table.Table object at 0x7f05d1fb1b40>
+        <pyarrow.lib.Table object at 0x7f05d1fb1b40>
         """
         names, arrays, metadata = _dataframe_to_arrays(df,
                                              timestamps_to_ms=timestamps_to_ms,
@@ -686,7 +662,7 @@ cdef class Table:
 
         Returns
         -------
-        pyarrow.table.Table
+        pyarrow.Table
 
         """
         cdef:
@@ -713,7 +689,7 @@ cdef class Table:
                 raise ValueError(type(arrays[i]))
 
         table.reset(new CTable(schema, columns))
-        return table_from_ctable(table)
+        return wrap_table(table)
 
     @staticmethod
     def from_batches(batches):
@@ -737,7 +713,7 @@ cdef class Table:
         with nogil:
             check_status(CTable.FromRecordBatches(c_batches, &c_table))
 
-        return table_from_ctable(c_table)
+        return wrap_table(c_table)
 
     def to_pandas(self, nthreads=None):
         """
@@ -782,9 +758,9 @@ cdef class Table:
 
         Returns
         -------
-        pyarrow.schema.Schema
+        pyarrow.Schema
         """
-        return box_schema(self.table.schema())
+        return wrap_schema(self.table.schema())
 
     def column(self, index):
         """
@@ -796,7 +772,7 @@ cdef class Table:
 
         Returns
         -------
-        pyarrow.table.Column
+        pyarrow.Column
         """
         self._check_nullptr()
         cdef Column column = Column()
@@ -863,7 +839,7 @@ cdef class Table:
         with nogil:
             check_status(self.table.AddColumn(i, column.sp_column, &c_table))
 
-        return table_from_ctable(c_table)
+        return wrap_table(c_table)
 
     def append_column(self, Column column):
         """
@@ -880,7 +856,7 @@ cdef class Table:
         with nogil:
             check_status(self.table.RemoveColumn(i, &c_table))
 
-        return table_from_ctable(c_table)
+        return wrap_table(c_table)
 
 
 def concat_tables(tables):
@@ -905,22 +881,4 @@ def concat_tables(tables):
     with nogil:
         check_status(ConcatenateTables(c_tables, &c_result))
 
-    return table_from_ctable(c_result)
-
-
-cdef object box_column(const shared_ptr[CColumn]& ccolumn):
-    cdef Column column = Column()
-    column.init(ccolumn)
-    return column
-
-
-cdef api object table_from_ctable(const shared_ptr[CTable]& ctable):
-    cdef Table table = Table()
-    table.init(ctable)
-    return table
-
-
-cdef api object batch_from_cbatch(const shared_ptr[CRecordBatch]& cbatch):
-    cdef RecordBatch batch = RecordBatch()
-    batch.init(cbatch)
-    return batch
+    return wrap_table(c_result)
diff --git a/python/setup.py b/python/setup.py
index 148224afb05..e592f3c924e 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -106,14 +106,10 @@ def initialize_options(self):
             os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0'))
 
     CYTHON_MODULE_NAMES = [
-        '_array',
+        'lib',
         '_config',
-        '_error',
-        '_io',
         '_jemalloc',
-        '_memory',
-        '_parquet',
-        '_table']
+        '_parquet']
 
     def _run_cmake(self):
         # The directory containing this setup.py

From ff1b5e519a1a607011478224adbd725d43b7bc49 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Fri, 12 May 2017 17:28:14 -0400
Subject: [PATCH 2/5] Rename things a bit

Change-Id: Ifc6b97f7bae539c7c072e6c4ddbba8b8dbd06bc5
---
 python/pyarrow/__init__.pxd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
index be2603ac9a5..b5ae1f44c85 100644
--- a/python/pyarrow/__init__.pxd
+++ b/python/pyarrow/__init__.pxd
@@ -20,9 +20,9 @@ from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
                                         CRecordBatch, CSchema,
                                         CTable, CTensor)
 
-cdef extern from "pyarrow.lib_api.h":
-    cdef int import_pyarrow__lib() except -1
 
+cdef extern from "arrow/python/pyarrow_api.h" namespace "arrow::py":
+    cdef int import_pyarrow() except -1
     cdef object wrap_data_type(const shared_ptr[CDataType]& type)
     cdef object wrap_field(const shared_ptr[CField]& field)
     cdef object wrap_schema(const shared_ptr[CSchema]& schema)

From b39d19cdf2e39b2bcb3e10cfaefd4eb21f5e62b2 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Fri, 12 May 2017 17:35:34 -0400
Subject: [PATCH 3/5] Fix test suite. Move _config into lib

Change-Id: I8eb4db35868420d374e03ebb9305e2884b3cac44
---
 python/CMakeLists.txt                |  1 -
 python/pyarrow/__init__.pxd          |  2 +-
 python/pyarrow/__init__.py           |  4 +---
 python/pyarrow/feather.py            |  6 ++---
 python/pyarrow/formatting.py         |  4 ++--
 python/pyarrow/includes/libarrow.pxd |  6 +++++
 python/pyarrow/lib.pyx               | 33 ++++++++++++++++++++++++++--
 python/pyarrow/table.pxi             |  2 +-
 python/pyarrow/tests/test_feather.py |  2 +-
 python/setup.py                      |  1 -
 10 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 44b5aaf7063..123dd5d8d7a 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -265,7 +265,6 @@ if (UNIX)
 endif()
 
 set(CYTHON_EXTENSIONS
-  _config
   lib
 )
 
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
index b5ae1f44c85..44305e01ca7 100644
--- a/python/pyarrow/__init__.pxd
+++ b/python/pyarrow/__init__.pxd
@@ -21,7 +21,7 @@ from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
                                         CTable, CTensor)
 
 
-cdef extern from "arrow/python/pyarrow_api.h" namespace "arrow::py":
+cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
     cdef int import_pyarrow() except -1
     cdef object wrap_data_type(const shared_ptr[CDataType]& type)
     cdef object wrap_field(const shared_ptr[CField]& field)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 7017103f93a..7d79811d988 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -25,9 +25,7 @@
    pass
 
 
-import pyarrow._config
-from pyarrow._config import cpu_count, set_cpu_count
-
+from pyarrow.lib import cpu_count, set_cpu_count
 from pyarrow.lib import (null, bool_,
                          int8, int16, int32, int64,
                          uint8, uint16, uint32, uint64,
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index c7b118e60a4..3754aec7372 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -22,9 +22,9 @@
 import pandas as pd
 
 from pyarrow.compat import pdapi
-from pyarrow._io import FeatherError  # noqa
-from pyarrow._table import Table
-import pyarrow._io as ext
+from pyarrow.lib import FeatherError  # noqa
+from pyarrow.lib import Table
+import pyarrow.lib as ext
 
 
 if LooseVersion(pd.__version__) < '0.17.0':
diff --git a/python/pyarrow/formatting.py b/python/pyarrow/formatting.py
index c3583448d6e..0af2873b293 100644
--- a/python/pyarrow/formatting.py
+++ b/python/pyarrow/formatting.py
@@ -17,7 +17,7 @@
 
 # Pretty-printing and other formatting utilities for Arrow data structures
 
-import pyarrow._array as _array
+import pyarrow.lib as lib
 
 
 def array_format(arr, window=None):
@@ -42,7 +42,7 @@ def array_format(arr, window=None):
 
 
 def value_format(x, indent_level=0):
-    if isinstance(x, _array.ListValue):
+    if isinstance(x, lib.ListValue):
         contents = ',\n'.join(value_format(item) for item in x)
         return '[{0}]'.format(_indent(contents, 1).strip())
     else:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9ed13b3e898..3d56c14bae2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -678,3 +678,9 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
 
     cdef cppclass PyBytesReader(CBufferReader):
         PyBytesReader(object fo)
+
+cdef extern from 'arrow/python/init.h':
+    int arrow_init_numpy() except -1
+
+cdef extern from 'arrow/python/config.h' namespace 'arrow::py':
+    void set_numpy_nan(object o)
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index b47630459e4..ae311aca8d0 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -30,14 +30,43 @@ import datetime
 import decimal as _pydecimal
 import numpy as np
 import six
-import pyarrow._config
 from pyarrow.compat import frombytes, tobytes, PandasSeries, Categorical
 
-
 cdef _pandas():
     import pandas as pd
     return pd
 
+
+arrow_init_numpy()
+
+import numpy as np
+set_numpy_nan(np.nan)
+
+import multiprocessing
+import os
+cdef int CPU_COUNT = int(
+    os.environ.get('OMP_NUM_THREADS',
+                   max(multiprocessing.cpu_count() // 2, 1)))
+
+
+def cpu_count():
+    """
+    Returns
+    -------
+    count : Number of CPUs to use by default in parallel operations. Default is
+      max(1, multiprocessing.cpu_count() / 2), but can be overridden by the
+      OMP_NUM_THREADS environment variable. For the default, we divide the CPU
+      count by 2 because most modern computers have hyperthreading turned on,
+      so doubling the CPU count beyond the number of physical cores does not
+      help.
+    """
+    return CPU_COUNT
+
+def set_cpu_count(count):
+    global CPU_COUNT
+    CPU_COUNT = max(int(count), 1)
+
+
 # Exception types
 include "error.pxi"
 
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index a78817b4aff..4ce0fb25b66 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -731,7 +731,7 @@ cdef class Table:
         pandas.DataFrame
         """
         if nthreads is None:
-            nthreads = pyarrow._config.cpu_count()
+            nthreads = cpu_count()
 
         mgr = table_to_blockmanager(self.sp_table, nthreads)
         return _pandas().DataFrame(mgr)
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 7a8abf486f4..69c32be5f3d 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -27,7 +27,7 @@
 from pyarrow.compat import guid
 from pyarrow.feather import (read_feather, write_feather,
                              FeatherReader)
-from pyarrow._io import FeatherWriter
+from pyarrow.lib import FeatherWriter
 
 
 def random_path():
diff --git a/python/setup.py b/python/setup.py
index e592f3c924e..b38fca47b23 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -107,7 +107,6 @@ def initialize_options(self):
 
     CYTHON_MODULE_NAMES = [
         'lib',
-        '_config',
         '_jemalloc',
         '_parquet']
 

From cff757debe34c1223d142122445847f798fff801 Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Fri, 12 May 2017 18:07:55 -0400
Subject: [PATCH 4/5] Expose pyarrow C API in arrow/python/pyarrow.h

Change-Id: I21e6a311273cda7eb793f8430581ba06b2393913
---
 cpp/CMakeLists.txt                  |   4 +-
 cpp/src/arrow/python/CMakeLists.txt |   2 +
 cpp/src/arrow/python/pyarrow.cc     |  75 +++++++++++++++
 cpp/src/arrow/python/pyarrow.h      |  55 +++++++++++
 cpp/src/arrow/python/pyarrow_api.h  | 143 ++++++++++++++++++++++++++++
 python/pyarrow/__init__.pxd         |   5 +-
 python/pyarrow/array.pxi            |  38 ++++----
 python/pyarrow/io.pxi               |  25 ++---
 python/pyarrow/public-api.pxi       |  26 +++--
 python/pyarrow/table.pxi            |  24 ++---
 10 files changed, 339 insertions(+), 58 deletions(-)
 create mode 100644 cpp/src/arrow/python/pyarrow.cc
 create mode 100644 cpp/src/arrow/python/pyarrow.h
 create mode 100644 cpp/src/arrow/python/pyarrow_api.h

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 21463794879..6b2ceec3277 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -852,7 +852,8 @@ if (UNIX)
     )
 
   FOREACH(item ${LINT_FILES})
-    IF(NOT (item MATCHES "_generated.h"))
+    IF(NOT ((item MATCHES "_generated.h") OR
+            (item MATCHES "pyarrow_api.h")))
       LIST(APPEND FILTERED_LINT_FILES ${item})
     ENDIF()
   ENDFOREACH(item ${LINT_FILES})
@@ -878,6 +879,7 @@ if (${CLANG_FORMAT_FOUND})
     `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h |
     sed -e '/_generated/g' |
     sed -e '/windows_compatibility.h/g' |
+    sed -e '/pyarrow_api.h/g' |
     sed -e '/config.h/g' |   # python/config.h
     sed -e '/platform.h/g'`  # python/platform.h
     )
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index c5cbc50845d..30852291d1a 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -50,6 +50,7 @@ set(ARROW_PYTHON_SRCS
   io.cc
   numpy_convert.cc
   pandas_convert.cc
+  pyarrow.cc
 )
 
 set(ARROW_PYTHON_SHARED_LINK_LIBS
@@ -90,6 +91,7 @@ install(FILES
   numpy_interop.h
   pandas_convert.h
   platform.h
+  pyarrow.h
   type_traits.h
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python")
 
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
new file mode 100644
index 00000000000..56c0381957f
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/pyarrow.h"
+
+#include <memory>
+
+#include "arrow/array.h"
+#include "arrow/table.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+
+namespace {
+#include "arrow/python/pyarrow_api.h"
+}
+
+namespace arrow {
+namespace py {
+
+int import_pyarrow() {
+  return ::import_pyarrow__lib();
+}
+
+PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer) {
+  return ::pyarrow_wrap_buffer(buffer);
+}
+
+PyObject* wrap_data_type(const std::shared_ptr<DataType>& type) {
+  return ::pyarrow_wrap_data_type(type);
+}
+
+PyObject* wrap_field(const std::shared_ptr<Field>& field) {
+  return ::pyarrow_wrap_field(field);
+}
+
+PyObject* wrap_schema(const std::shared_ptr<Schema>& schema) {
+  return ::pyarrow_wrap_schema(schema);
+}
+
+PyObject* wrap_array(const std::shared_ptr<Array>& array) {
+  return ::pyarrow_wrap_array(array);
+}
+
+PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor) {
+  return ::pyarrow_wrap_tensor(tensor);
+}
+
+PyObject* wrap_column(const std::shared_ptr<Column>& column) {
+  return ::pyarrow_wrap_column(column);
+}
+
+PyObject* wrap_table(const std::shared_ptr<Table>& table) {
+  return ::pyarrow_wrap_table(table);
+}
+
+PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch) {
+  return ::pyarrow_wrap_batch(batch);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
new file mode 100644
index 00000000000..7c618ce1925
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_PYTHON_PYARROW_H
+#define ARROW_PYTHON_PYARROW_H
+
+#include "arrow/python/platform.h"
+
+#include <memory>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class Buffer;
+class Column;
+class DataType;
+class Field;
+class RecordBatch;
+class Schema;
+class Table;
+class Tensor;
+
+namespace py {
+
+ARROW_EXPORT int import_pyarrow();
+ARROW_EXPORT PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer);
+ARROW_EXPORT PyObject* wrap_data_type(const std::shared_ptr<DataType>& type);
+ARROW_EXPORT PyObject* wrap_field(const std::shared_ptr<Field>& field);
+ARROW_EXPORT PyObject* wrap_schema(const std::shared_ptr<Schema>& schema);
+ARROW_EXPORT PyObject* wrap_array(const std::shared_ptr<Array>& array);
+ARROW_EXPORT PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor);
+ARROW_EXPORT PyObject* wrap_column(const std::shared_ptr<Column>& column);
+ARROW_EXPORT PyObject* wrap_table(const std::shared_ptr<Table>& table);
+ARROW_EXPORT PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch);
+
+}  // namespace py
+}  // namespace arrow
+
+#endif  // ARROW_PYTHON_PYARROW_H
diff --git a/cpp/src/arrow/python/pyarrow_api.h b/cpp/src/arrow/python/pyarrow_api.h
new file mode 100644
index 00000000000..7b708446610
--- /dev/null
+++ b/cpp/src/arrow/python/pyarrow_api.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// DO NOT EDIT THIS FILE. Update from pyarrow/lib_api.h after pyarrow build
+
+/* Generated by Cython 0.25.2 */
+
+#ifndef __PYX_HAVE_API__pyarrow__lib
+#define __PYX_HAVE_API__pyarrow__lib
+#include "Python.h"
+
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer>  const &) = 0;
+#define pyarrow_wrap_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type)(std::shared_ptr< arrow::DataType>  const &) = 0;
+#define pyarrow_wrap_data_type __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field)(std::shared_ptr< arrow::Field>  const &) = 0;
+#define pyarrow_wrap_field __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema)(std::shared_ptr< arrow::Schema>  const &) = 0;
+#define pyarrow_wrap_schema __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array>  const &) = 0;
+#define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor)(std::shared_ptr< arrow::Tensor>  const &) = 0;
+#define pyarrow_wrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column)(std::shared_ptr< arrow::Column>  const &) = 0;
+#define pyarrow_wrap_column __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table)(std::shared_ptr< arrow::Table>  const &) = 0;
+#define pyarrow_wrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch>  const &) = 0;
+#define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(const char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+    py_name = __Pyx_PyIdentifier_FromString(name);
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+#ifndef __PYX_HAVE_RT_ImportFunction
+#define __PYX_HAVE_RT_ImportFunction
+static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+    PyObject *d = 0;
+    PyObject *cobj = 0;
+    union {
+        void (*fp)(void);
+        void *p;
+    } tmp;
+    d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
+    if (!d)
+        goto bad;
+    cobj = PyDict_GetItemString(d, funcname);
+    if (!cobj) {
+        PyErr_Format(PyExc_ImportError,
+            "%.200s does not export expected C function %.200s",
+                PyModule_GetName(module), funcname);
+        goto bad;
+    }
+#if PY_VERSION_HEX >= 0x02070000
+    if (!PyCapsule_IsValid(cobj, sig)) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
+        goto bad;
+    }
+    tmp.p = PyCapsule_GetPointer(cobj, sig);
+#else
+    {const char *desc, *s1, *s2;
+    desc = (const char *)PyCObject_GetDesc(cobj);
+    if (!desc)
+        goto bad;
+    s1 = desc; s2 = sig;
+    while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
+    if (*s1 != *s2) {
+        PyErr_Format(PyExc_TypeError,
+            "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
+             PyModule_GetName(module), funcname, sig, desc);
+        goto bad;
+    }
+    tmp.p = PyCObject_AsVoidPtr(cobj);}
+#endif
+    *f = tmp.fp;
+    if (!(*f))
+        goto bad;
+    Py_DECREF(d);
+    return 0;
+bad:
+    Py_XDECREF(d);
+    return -1;
+}
+#endif
+
+
+static int import_pyarrow__lib(void) {
+  PyObject *module = 0;
+  module = __Pyx_ImportModule("pyarrow.lib");
+  if (!module) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_data_type, "PyObject *(std::shared_ptr< arrow::DataType>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_field, "PyObject *(std::shared_ptr< arrow::Field>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_schema, "PyObject *(std::shared_ptr< arrow::Schema>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_tensor, "PyObject *(std::shared_ptr< arrow::Tensor>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column, "PyObject *(std::shared_ptr< arrow::Column>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_table, "PyObject *(std::shared_ptr< arrow::Table>  const &)") < 0) goto bad;
+  if (__Pyx_ImportFunction(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch>  const &)") < 0) goto bad;
+  Py_DECREF(module); module = 0;
+  return 0;
+  bad:
+  Py_XDECREF(module);
+  return -1;
+}
+
+#endif /* !__PYX_HAVE_API__pyarrow__lib */
diff --git a/python/pyarrow/__init__.pxd b/python/pyarrow/__init__.pxd
index 44305e01ca7..4f434557915 100644
--- a/python/pyarrow/__init__.pxd
+++ b/python/pyarrow/__init__.pxd
@@ -16,13 +16,14 @@
 # under the License.
 
 from libcpp.memory cimport shared_ptr
-from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
-                                        CRecordBatch, CSchema,
+from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
+                                        CField, CRecordBatch, CSchema,
                                         CTable, CTensor)
 
 
 cdef extern from "arrow/python/pyarrow.h" namespace "arrow::py":
     cdef int import_pyarrow() except -1
+    cdef object wrap_buffer(const shared_ptr[CBuffer]& buffer)
     cdef object wrap_data_type(const shared_ptr[CDataType]& type)
     cdef object wrap_field(const shared_ptr[CField]& field)
     cdef object wrap_schema(const shared_ptr[CSchema]& schema)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index a115cfdb36c..46e94b4f4b3 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -162,7 +162,7 @@ cdef class Field:
     cdef init(self, const shared_ptr[CField]& field):
         self.sp_field = field
         self.field = field.get()
-        self.type = wrap_data_type(field.get().type())
+        self.type = pyarrow_wrap_data_type(field.get().type())
 
     def equals(self, Field other):
         """
@@ -220,7 +220,7 @@ cdef class Field:
         with nogil:
             check_status(self.field.AddMetadata(c_meta, &new_field))
 
-        return wrap_field(new_field)
+        return pyarrow_wrap_field(new_field)
 
     def remove_metadata(self):
         """
@@ -233,7 +233,7 @@ cdef class Field:
         cdef shared_ptr[CField] new_field
         with nogil:
             new_field = self.field.RemoveMetadata()
-        return wrap_field(new_field)
+        return pyarrow_wrap_field(new_field)
 
 
 cdef class Schema:
@@ -250,7 +250,7 @@ cdef class Schema:
 
         cdef Field result = Field()
         result.init(self.schema.field(i))
-        result.type = wrap_data_type(result.field.type())
+        result.type = pyarrow_wrap_data_type(result.field.type())
 
         return result
 
@@ -298,7 +298,7 @@ cdef class Schema:
         -------
         field: pyarrow.Field
         """
-        return wrap_field(self.schema.GetFieldByName(tobytes(name)))
+        return pyarrow_wrap_field(self.schema.GetFieldByName(tobytes(name)))
 
     def add_metadata(self, dict metadata):
         """
@@ -320,7 +320,7 @@ cdef class Schema:
         with nogil:
             check_status(self.schema.AddMetadata(c_meta, &new_schema))
 
-        return wrap_schema(new_schema)
+        return pyarrow_wrap_schema(new_schema)
 
     def remove_metadata(self):
         """
@@ -333,7 +333,7 @@ cdef class Schema:
         cdef shared_ptr[CSchema] new_schema
         with nogil:
             new_schema = self.schema.RemoveMetadata()
-        return wrap_schema(new_schema)
+        return pyarrow_wrap_schema(new_schema)
 
     def __str__(self):
         return frombytes(self.schema.ToString())
@@ -580,7 +580,7 @@ def float64():
 cpdef DataType decimal(int precision, int scale=0):
     cdef shared_ptr[CDataType] decimal_type
     decimal_type.reset(new CDecimalType(precision, scale))
-    return wrap_data_type(decimal_type)
+    return pyarrow_wrap_data_type(decimal_type)
 
 
 def string():
@@ -605,7 +605,7 @@ def binary(int length=-1):
 
     cdef shared_ptr[CDataType] fixed_size_binary_type
     fixed_size_binary_type.reset(new CFixedSizeBinaryType(length))
-    return wrap_data_type(fixed_size_binary_type)
+    return pyarrow_wrap_data_type(fixed_size_binary_type)
 
 
 def list_(DataType value_type):
@@ -679,7 +679,7 @@ def from_numpy_dtype(object dtype):
     with nogil:
         check_status(NumPyDtypeToArrow(dtype, &c_type))
 
-    return wrap_data_type(c_type)
+    return pyarrow_wrap_data_type(c_type)
 
 
 NA = None
@@ -902,7 +902,7 @@ cdef class ListValue(ArrayValue):
     cdef void _set_array(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
         self.ap = <CListArray*> sp_array.get()
-        self.value_type = wrap_data_type(self.ap.value_type())
+        self.value_type = pyarrow_wrap_data_type(self.ap.value_type())
 
     cdef getitem(self, int64_t i):
         cdef int64_t j = self.ap.value_offset(self.index) + i
@@ -1026,7 +1026,7 @@ def array(object sequence, DataType type=None, MemoryPool memory_pool=None):
             )
         )
 
-    return wrap_array(sp_array)
+    return pyarrow_wrap_array(sp_array)
 
 
 
@@ -1035,7 +1035,7 @@ cdef class Array:
     cdef init(self, const shared_ptr[CArray]& sp_array):
         self.sp_array = sp_array
         self.ap = sp_array.get()
-        self.type = wrap_data_type(self.sp_array.get().type())
+        self.type = pyarrow_wrap_data_type(self.sp_array.get().type())
 
     @staticmethod
     def from_pandas(obj, mask=None, DataType type=None,
@@ -1129,7 +1129,7 @@ cdef class Array:
                 check_status(PandasToArrow(
                     pool, values, mask, c_type, &out))
 
-        return wrap_array(out)
+        return pyarrow_wrap_array(out)
 
     property null_count:
 
@@ -1213,7 +1213,7 @@ cdef class Array:
         else:
             result = self.ap.Slice(offset, length)
 
-        return wrap_array(result)
+        return pyarrow_wrap_array(result)
 
     def to_pandas(self):
         """
@@ -1244,7 +1244,7 @@ cdef class Tensor:
     cdef init(self, const shared_ptr[CTensor]& sp_tensor):
         self.sp_tensor = sp_tensor
         self.tp = sp_tensor.get()
-        self.type = wrap_data_type(self.tp.type())
+        self.type = pyarrow_wrap_data_type(self.tp.type())
 
     def __repr__(self):
         return """<pyarrow.Tensor>
@@ -1256,7 +1256,7 @@ strides: {2}""".format(self.type, self.shape, self.strides)
     def from_numpy(obj):
         cdef shared_ptr[CTensor] ctensor
         check_status(NdarrayToTensor(c_default_memory_pool(), obj, &ctensor))
-        return wrap_tensor(ctensor)
+        return pyarrow_wrap_tensor(ctensor)
 
     def to_numpy(self):
         """
@@ -1442,7 +1442,7 @@ cdef class DictionaryArray(Array):
             cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
 
             if self._dictionary is None:
-                self._dictionary = wrap_array(darr.dictionary())
+                self._dictionary = pyarrow_wrap_array(darr.dictionary())
 
             return self._dictionary
 
@@ -1452,7 +1452,7 @@ cdef class DictionaryArray(Array):
             cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
 
             if self._indices is None:
-                self._indices = wrap_array(darr.indices())
+                self._indices = pyarrow_wrap_array(darr.indices())
 
             return self._indices
 
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 13e8b638723..a0a96e72864 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -163,7 +163,7 @@ cdef class NativeFile:
         with nogil:
             check_status(self.rd_file.get().ReadB(c_nbytes, &output))
 
-        return wrap_buffer(output)
+        return pyarrow_wrap_buffer(output)
 
     def download(self, stream_or_path, buffer_size=None):
         """
@@ -483,7 +483,7 @@ cdef class Buffer:
             if parent_buf.get() == NULL:
                 return None
             else:
-                return wrap_buffer(parent_buf)
+                return pyarrow_wrap_buffer(parent_buf)
 
     def __getitem__(self, key):
         # TODO(wesm): buffer slicing
@@ -531,7 +531,7 @@ cdef class InMemoryOutputStream(NativeFile):
     def get_result(self):
         check_status(self.wr_file.get().Close())
         self.is_open = False
-        return wrap_buffer(<shared_ptr[CBuffer]> self.buffer)
+        return pyarrow_wrap_buffer(<shared_ptr[CBuffer]> self.buffer)
 
 
 cdef class BufferReader(NativeFile):
@@ -566,18 +566,11 @@ def frombuffer(object obj):
     try:
         memoryview(obj)
         buf.reset(new PyBuffer(obj))
-        return wrap_buffer(buf)
+        return pyarrow_wrap_buffer(buf)
     except TypeError:
         raise ValueError('Must pass object that implements buffer protocol')
 
 
-
-cdef Buffer wrap_buffer(const shared_ptr[CBuffer]& buf):
-    cdef Buffer result = Buffer()
-    result.init(buf)
-    return result
-
-
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
     cdef NativeFile nf
 
@@ -993,7 +986,7 @@ cdef class _StreamReader:
         if batch.get() == NULL:
             raise StopIteration
 
-        return wrap_batch(batch)
+        return pyarrow_wrap_batch(batch)
 
     def read_all(self):
         """
@@ -1013,7 +1006,7 @@ cdef class _StreamReader:
 
             check_status(CTable.FromRecordBatches(batches, &table))
 
-        return wrap_table(table)
+        return pyarrow_wrap_table(table)
 
 
 cdef class _FileWriter(_StreamWriter):
@@ -1066,7 +1059,7 @@ cdef class _FileReader:
         with nogil:
             check_status(self.reader.get().GetRecordBatch(i, &batch))
 
-        return wrap_batch(batch)
+        return pyarrow_wrap_batch(batch)
 
     # TODO(wesm): ARROW-503: Function was renamed. Remove after a period of
     # time has passed
@@ -1089,7 +1082,7 @@ cdef class _FileReader:
                 check_status(self.reader.get().GetRecordBatch(i, &batches[i]))
             check_status(CTable.FromRecordBatches(batches, &table))
 
-        return wrap_table(table)
+        return pyarrow_wrap_table(table)
 
 
 #----------------------------------------------------------------------
@@ -1257,4 +1250,4 @@ def read_tensor(NativeFile source):
     with nogil:
         check_status(ReadTensor(offset, source.rd_file.get(), &sp_tensor))
 
-    return wrap_tensor(sp_tensor)
+    return pyarrow_wrap_tensor(sp_tensor)
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 32b27ac93a0..7b556512433 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -20,7 +20,15 @@ from pyarrow.includes.libarrow cimport (CArray, CColumn, CDataType, CField,
                                         CRecordBatch, CSchema,
                                         CTable, CTensor)
 
-cdef public api object wrap_data_type(const shared_ptr[CDataType]& type):
+
+cdef public api object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf):
+    cdef Buffer result = Buffer()
+    result.init(buf)
+    return result
+
+
+cdef public api object pyarrow_wrap_data_type(
+    const shared_ptr[CDataType]& type):
     cdef:
         DataType out
 
@@ -42,7 +50,7 @@ cdef public api object wrap_data_type(const shared_ptr[CDataType]& type):
     return out
 
 
-cdef public api object wrap_field(const shared_ptr[CField]& field):
+cdef public api object pyarrow_wrap_field(const shared_ptr[CField]& field):
     if field.get() == NULL:
         return None
     cdef Field out = Field()
@@ -50,13 +58,13 @@ cdef public api object wrap_field(const shared_ptr[CField]& field):
     return out
 
 
-cdef public api object wrap_schema(const shared_ptr[CSchema]& type):
+cdef public api object pyarrow_wrap_schema(const shared_ptr[CSchema]& type):
     cdef Schema out = Schema()
     out.init_schema(type)
     return out
 
 
-cdef public api object wrap_array(const shared_ptr[CArray]& sp_array):
+cdef public api object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array):
     if sp_array.get() == NULL:
         raise ValueError('Array was NULL')
 
@@ -70,7 +78,8 @@ cdef public api object wrap_array(const shared_ptr[CArray]& sp_array):
     return arr
 
 
-cdef public api object wrap_tensor(const shared_ptr[CTensor]& sp_tensor):
+cdef public api object pyarrow_wrap_tensor(
+    const shared_ptr[CTensor]& sp_tensor):
     if sp_tensor.get() == NULL:
         raise ValueError('Tensor was NULL')
 
@@ -79,19 +88,20 @@ cdef public api object wrap_tensor(const shared_ptr[CTensor]& sp_tensor):
     return tensor
 
 
-cdef public api object wrap_column(const shared_ptr[CColumn]& ccolumn):
+cdef public api object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn):
     cdef Column column = Column()
     column.init(ccolumn)
     return column
 
 
-cdef public api object wrap_table(const shared_ptr[CTable]& ctable):
+cdef public api object pyarrow_wrap_table(const shared_ptr[CTable]& ctable):
     cdef Table table = Table()
     table.init(ctable)
     return table
 
 
-cdef public api object wrap_batch(const shared_ptr[CRecordBatch]& cbatch):
+cdef public api object pyarrow_wrap_batch(
+    const shared_ptr[CRecordBatch]& cbatch):
     cdef RecordBatch batch = RecordBatch()
     batch.init(cbatch)
     return batch
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 4ce0fb25b66..8dd18cf4136 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -83,7 +83,7 @@ cdef class ChunkedArray:
         pyarrow.Array
         """
         self._check_nullptr()
-        return wrap_array(self.chunked_array.chunk(i))
+        return pyarrow_wrap_array(self.chunked_array.chunk(i))
 
     def iterchunks(self):
         for i in range(self.num_chunks):
@@ -126,7 +126,7 @@ cdef class Column:
 
         cdef shared_ptr[CColumn] sp_column
         sp_column.reset(new CColumn(boxed_field.sp_field, arr.sp_array))
-        return wrap_column(sp_column)
+        return pyarrow_wrap_column(sp_column)
 
     def to_pandas(self):
         """
@@ -232,7 +232,7 @@ cdef class Column:
         -------
         pyarrow.DataType
         """
-        return wrap_data_type(self.column.type())
+        return pyarrow_wrap_data_type(self.column.type())
 
     @property
     def data(self):
@@ -384,7 +384,7 @@ cdef class RecordBatch:
         return self._schema
 
     def __getitem__(self, i):
-        return wrap_array(self.batch.column(i))
+        return pyarrow_wrap_array(self.batch.column(i))
 
     def slice(self, offset=0, length=None):
         """
@@ -412,7 +412,7 @@ cdef class RecordBatch:
         else:
             result = self.batch.Slice(offset, length)
 
-        return wrap_batch(result)
+        return pyarrow_wrap_batch(result)
 
     def equals(self, RecordBatch other):
         cdef:
@@ -510,7 +510,7 @@ cdef class RecordBatch:
             c_arrays.push_back(arr.sp_array)
 
         batch.reset(new CRecordBatch(schema, num_rows, c_arrays))
-        return wrap_batch(batch)
+        return pyarrow_wrap_batch(batch)
 
 
 cdef table_to_blockmanager(const shared_ptr[CTable]& table, int nthreads):
@@ -689,7 +689,7 @@ cdef class Table:
                 raise ValueError(type(arrays[i]))
 
         table.reset(new CTable(schema, columns))
-        return wrap_table(table)
+        return pyarrow_wrap_table(table)
 
     @staticmethod
     def from_batches(batches):
@@ -713,7 +713,7 @@ cdef class Table:
         with nogil:
             check_status(CTable.FromRecordBatches(c_batches, &c_table))
 
-        return wrap_table(c_table)
+        return pyarrow_wrap_table(c_table)
 
     def to_pandas(self, nthreads=None):
         """
@@ -760,7 +760,7 @@ cdef class Table:
         -------
         pyarrow.Schema
         """
-        return wrap_schema(self.table.schema())
+        return pyarrow_wrap_schema(self.table.schema())
 
     def column(self, index):
         """
@@ -839,7 +839,7 @@ cdef class Table:
         with nogil:
             check_status(self.table.AddColumn(i, column.sp_column, &c_table))
 
-        return wrap_table(c_table)
+        return pyarrow_wrap_table(c_table)
 
     def append_column(self, Column column):
         """
@@ -856,7 +856,7 @@ cdef class Table:
         with nogil:
             check_status(self.table.RemoveColumn(i, &c_table))
 
-        return wrap_table(c_table)
+        return pyarrow_wrap_table(c_table)
 
 
 def concat_tables(tables):
@@ -881,4 +881,4 @@ def concat_tables(tables):
     with nogil:
         check_status(ConcatenateTables(c_tables, &c_result))
 
-    return wrap_table(c_result)
+    return pyarrow_wrap_table(c_result)

From 9e6ee24673e9fcb1f0410aa2eb5e04d1fb0b11cd Mon Sep 17 00:00:00 2001
From: Wes McKinney <wes.mckinney@twosigma.com>
Date: Sat, 13 May 2017 15:19:00 -0400
Subject: [PATCH 5/5] Fix up optional extensions

Change-Id: Icad57e6d5e9ee5302e8623664c3c58ac363bdd69
---
 python/pyarrow/_jemalloc.pyx |  2 +-
 python/pyarrow/_parquet.pyx  | 21 +++++++++++----------
 python/pyarrow/lib.pxd       | 10 ++++++++++
 python/pyarrow/parquet.py    | 13 ++++++-------
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/python/pyarrow/_jemalloc.pyx b/python/pyarrow/_jemalloc.pyx
index 3b41964a39c..6f00c9d2dde 100644
--- a/python/pyarrow/_jemalloc.pyx
+++ b/python/pyarrow/_jemalloc.pyx
@@ -20,7 +20,7 @@
 # cython: embedsignature = True
 
 from pyarrow.includes.libarrow_jemalloc cimport CJemallocMemoryPool
-from pyarrow._memory cimport MemoryPool
+from pyarrow.lib cimport MemoryPool
 
 def default_pool():
     cdef MemoryPool pool = MemoryPool()
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index b0a029e6960..51bd938c79a 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -22,15 +22,16 @@
 from cython.operator cimport dereference as deref
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport *
-from pyarrow._array cimport Array, Schema, box_schema
-from pyarrow._error cimport check_status
-from pyarrow._memory cimport MemoryPool, maybe_unbox_memory_pool
-from pyarrow._table cimport Table, table_from_ctable
-from pyarrow._io cimport NativeFile, get_reader, get_writer
+from pyarrow.lib cimport (Array, Schema,
+                          check_status,
+                          MemoryPool, maybe_unbox_memory_pool,
+                          Table,
+                          pyarrow_wrap_schema,
+                          pyarrow_wrap_table,
+                          NativeFile, get_reader, get_writer)
 
 from pyarrow.compat import tobytes, frombytes
-from pyarrow._error import ArrowException
-from pyarrow._io import NativeFile
+from pyarrow.lib import ArrowException, NativeFile
 
 import six
 
@@ -212,7 +213,7 @@ cdef class ParquetSchema:
         with nogil:
             check_status(FromParquetSchema(self.schema, &sp_arrow_schema))
 
-        return box_schema(sp_arrow_schema)
+        return pyarrow_wrap_schema(sp_arrow_schema)
 
     def equals(self, ParquetSchema other):
         """
@@ -425,7 +426,7 @@ cdef class ParquetReader:
             with nogil:
                 check_status(self.reader.get()
                              .ReadRowGroup(i, &ctable))
-        return table_from_ctable(ctable)
+        return pyarrow_wrap_table(ctable)
 
     def read_all(self, column_indices=None):
         cdef:
@@ -444,7 +445,7 @@ cdef class ParquetReader:
             with nogil:
                 check_status(self.reader.get()
                              .ReadTable(&ctable))
-        return table_from_ctable(ctable)
+        return pyarrow_wrap_table(ctable)
 
     def column_name_idx(self, column_name):
         """
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 08d4a5d0e70..d3d03aaaefa 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -325,3 +325,13 @@ cdef class NativeFile:
 
 cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
 cdef get_writer(object source, shared_ptr[OutputStream]* writer)
+
+cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
+cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
+cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
+cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
+cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
+cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
+cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
+cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
+cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 21359f137f2..050ec3176d7 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -24,8 +24,7 @@
                               RowGroupMetaData, ParquetSchema,
                               ParquetWriter)
 import pyarrow._parquet as _parquet  # noqa
-import pyarrow._array as _array
-import pyarrow._table as _table
+import pyarrow.lib as lib
 
 
 # ----------------------------------------------------------------------
@@ -241,8 +240,8 @@ def read(self, columns=None, nthreads=1, partitions=None,
                 # manifest, so ['a', 'b', 'c'] as in our example above.
                 dictionary = partitions.levels[i].dictionary
 
-                arr = _array.DictionaryArray.from_arrays(indices, dictionary)
-                col = _table.Column.from_array(name, arr)
+                arr = lib.DictionaryArray.from_arrays(indices, dictionary)
+                col = lib.Column.from_array(name, arr)
                 table = table.append_column(col)
 
         return table
@@ -298,9 +297,9 @@ def dictionary(self):
         # Only integer and string partition types are supported right now
         try:
             integer_keys = [int(x) for x in self.keys]
-            dictionary = _array.array(integer_keys)
+            dictionary = lib.array(integer_keys)
         except ValueError:
-            dictionary = _array.array(self.keys)
+            dictionary = lib.array(self.keys)
 
         self._dictionary = dictionary
         return dictionary
@@ -539,7 +538,7 @@ def read(self, columns=None, nthreads=1):
                                open_file_func=open_file)
             tables.append(table)
 
-        all_data = _table.concat_tables(tables)
+        all_data = lib.concat_tables(tables)
         return all_data
 
     def _get_open_file_func(self):