From e8f84a93b76bbb872654efcc19dd069f73b97cdb Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 30 Mar 2016 18:53:20 +0200 Subject: [PATCH 1/3] ARROW-49: [Python] Add Column and Table wrapper interface --- python/pyarrow/array.pyx | 99 ++++++++++++++++++++++++++++- python/pyarrow/tests/test_column.py | 49 ++++++++++++++ python/pyarrow/tests/test_table.py | 34 ++++++++++ 3 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 python/pyarrow/tests/test_column.py diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 255efc268fe..8593fce692c 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -287,6 +287,66 @@ cdef class RowBatch: return self.arrays[i] +cdef class Column: + ''' + Do not call this class's constructor directly. + ''' + cdef: + shared_ptr[CColumn] sp_column + CColumn* column + + def __cinit__(self): + self.column = NULL + + cdef init(self, const shared_ptr[CColumn]& column): + self.sp_column = column + self.column = column.get() + + def to_pandas(self): + """ + Convert the arrow::Column to a pandas Series + """ + cdef: + PyObject* arr + + import pandas as pd + + check_status(pyarrow.ArrowToPandas(self.sp_column, &arr)) + return pd.Series(arr, name=self.name()) + + def _check_nullptr(self): + if self.column == NULL: + raise ReferenceError("Column object references a NULL pointer." + "Not initialized.") + + def __len__(self): + self._check_nullptr() + return self.column.length() + + def length(self): + self._check_nullptr() + return self.column.length() + + property shape: + + def __get__(self): + self._check_nullptr() + return (self.length(),) + + def null_count(self): + self._check_nullptr() + return self.column.null_count() + + def name(self): + return frombytes(self.column.name()) + + def type(self): + raise NotImplementedError("Type information not yet implemented") + + def data(self): + raise NotImplementedError("No python wrapper for ChunkedArray yet") + + cdef class Table: ''' Do not call this class's constructor directly. @@ -296,12 +356,17 @@ cdef class Table: CTable* table def __cinit__(self): - pass + self.table = NULL cdef init(self, const shared_ptr[CTable]& table): self.sp_table = table self.table = table.get() + def _check_nullptr(self): + if self.table == NULL: + raise ReferenceError("Table object references a NULL pointer." + "Not initialized.") + @staticmethod def from_pandas(df, name=None): pass @@ -360,3 +425,35 @@ cdef class Table: data.append( arr) return pd.DataFrame(dict(zip(names, data)), columns=names) + + def name(self): + self._check_nullptr() + return frombytes(self.table.name()) + + def schema(self): + raise NotImplementedError() + + def column(self, index_or_name): + self._check_nullptr() + index = index_or_name + # TODO(uwe): Implement retrevial by name + cdef Column column = Column() + column.init(self.table.column(index)) + return column + + def columns(self): + for i in range(self.num_columns()): + yield self.column(i) + + def num_columns(self): + self._check_nullptr() + return self.table.num_columns() + + def num_rows(self): + self._check_nullptr() + return self.table.num_rows() + + property shape: + + def __get__(self): + return (self.num_rows(), self.num_columns()) diff --git a/python/pyarrow/tests/test_column.py b/python/pyarrow/tests/test_column.py new file mode 100644 index 00000000000..fa862550829 --- /dev/null +++ b/python/pyarrow/tests/test_column.py @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyarrow.compat import unittest +import pyarrow as arrow + +A = arrow + +import pandas as pd + + +class TestColumn(unittest.TestCase): + + def test_basics(self): + data = [ + A.from_pylist([-10, -5, 0, 5, 10]) + ] + table = A.Table.from_arrays(('a'), data, 'table_name') + column = table.column(0) + assert column.name() == 'a' + assert column.length() == 5 + assert len(column) == 5 + assert column.shape == (5,) + + def test_pandas(self): + data = [ + A.from_pylist([-10, -5, 0, 5, 10]) + ] + table = A.Table.from_arrays(('a'), data, 'table_name') + column = table.column(0) + series = column.to_pandas() + assert series.name == 'a' + assert series.shape == (5,) + assert series.iloc[0] == -10 + diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 2e24445bd0c..797a3a75b54 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -20,6 +20,8 @@ A = arrow +import pandas as pd + class TestRowBatch(unittest.TestCase): @@ -38,3 +40,35 @@ def test_basics(self): assert len(batch) == num_rows assert batch.num_rows == num_rows assert batch.num_columns == len(data) + + +class TestTable(unittest.TestCase): + + def test_basics(self): + data = [ + A.from_pylist(range(5)), + A.from_pylist([-10, -5, 0, 5, 10]) + ] + table = A.Table.from_arrays(('a', 'b'), data, 'table_name') + assert table.name() == 'table_name' + assert table.num_rows() == 5 + assert table.num_columns() == 2 + assert table.shape == (5, 2) + + def test_pandas(self): + data = [ + A.from_pylist(range(5)), + A.from_pylist([-10, -5, 0, 5, 10]) + ] + table = A.Table.from_arrays(('a', 'b'), data, 'table_name') + + # TODO: Use this part once from_pandas is implemented + # data = {'a': range(5), 'b': [-10, -5, 0, 5, 10]} + # df = pd.DataFrame(data) + # A.Table.from_pandas(df) + + df = table.to_pandas() + assert set(df.columns) == set(('a', 'b')) + assert df.shape == (5, 2) + assert df.ix[0, 'b'] == -10 + From e422fafc66bbbb9a33963b4b643a9cd948174792 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 31 Mar 2016 22:58:25 +0200 Subject: [PATCH 2/3] Incoportate PR feedback, Add ChunkedArray interface --- python/CMakeLists.txt | 1 + python/pyarrow/__init__.py | 4 +- python/pyarrow/array.pxd | 2 + python/pyarrow/array.pyx | 172 +---------------- python/pyarrow/includes/libarrow.pxd | 5 +- python/pyarrow/schema.pxd | 2 + python/pyarrow/schema.pyx | 9 + python/pyarrow/table.pxd | 46 +++++ python/pyarrow/table.pyx | 264 +++++++++++++++++++++++++++ python/pyarrow/tests/test_column.py | 2 +- python/pyarrow/tests/test_table.py | 11 +- python/setup.py | 2 +- 12 files changed, 343 insertions(+), 177 deletions(-) create mode 100644 python/pyarrow/table.pxd create mode 100644 python/pyarrow/table.pyx diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ebe825f65c4..2173232d4ef 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -444,6 +444,7 @@ set(CYTHON_EXTENSIONS error scalar schema + table ) foreach(module ${CYTHON_EXTENSIONS}) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index c343f5ba5f1..40a09c2feae 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -41,4 +41,6 @@ list_, struct, field, DataType, Field, Schema, schema) -from pyarrow.array import RowBatch, Table, from_pandas_dataframe +from pyarrow.array import RowBatch, from_pandas_dataframe + +from pyarrow.table import Column, Table diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd index de3c7741962..8cd15cd4502 100644 --- a/python/pyarrow/array.pxd +++ b/python/pyarrow/array.pxd @@ -36,6 +36,8 @@ cdef class Array: cdef init(self, const shared_ptr[CArray]& sp_array) cdef getitem(self, int i) +cdef object box_arrow_array(const shared_ptr[CArray]& sp_array) + cdef class BooleanArray(Array): pass diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 8593fce692c..456bf6d1da8 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -33,6 +33,8 @@ from pyarrow.scalar import NA from pyarrow.schema cimport Schema import pyarrow.schema as schema +from pyarrow.table cimport Table + def total_allocated_bytes(): cdef MemoryPool* pool = pyarrow.GetMemoryPool() return pool.bytes_allocated() @@ -287,173 +289,3 @@ cdef class RowBatch: return self.arrays[i] -cdef class Column: - ''' - Do not call this class's constructor directly. - ''' - cdef: - shared_ptr[CColumn] sp_column - CColumn* column - - def __cinit__(self): - self.column = NULL - - cdef init(self, const shared_ptr[CColumn]& column): - self.sp_column = column - self.column = column.get() - - def to_pandas(self): - """ - Convert the arrow::Column to a pandas Series - """ - cdef: - PyObject* arr - - import pandas as pd - - check_status(pyarrow.ArrowToPandas(self.sp_column, &arr)) - return pd.Series(arr, name=self.name()) - - def _check_nullptr(self): - if self.column == NULL: - raise ReferenceError("Column object references a NULL pointer." - "Not initialized.") - - def __len__(self): - self._check_nullptr() - return self.column.length() - - def length(self): - self._check_nullptr() - return self.column.length() - - property shape: - - def __get__(self): - self._check_nullptr() - return (self.length(),) - - def null_count(self): - self._check_nullptr() - return self.column.null_count() - - def name(self): - return frombytes(self.column.name()) - - def type(self): - raise NotImplementedError("Type information not yet implemented") - - def data(self): - raise NotImplementedError("No python wrapper for ChunkedArray yet") - - -cdef class Table: - ''' - Do not call this class's constructor directly. - ''' - cdef: - shared_ptr[CTable] sp_table - CTable* table - - def __cinit__(self): - self.table = NULL - - cdef init(self, const shared_ptr[CTable]& table): - self.sp_table = table - self.table = table.get() - - def _check_nullptr(self): - if self.table == NULL: - raise ReferenceError("Table object references a NULL pointer." - "Not initialized.") - - @staticmethod - def from_pandas(df, name=None): - pass - - @staticmethod - def from_arrays(names, arrays, name=None): - cdef: - Array arr - Table result - c_string c_name - vector[shared_ptr[CField]] fields - vector[shared_ptr[CColumn]] columns - shared_ptr[CSchema] schema - shared_ptr[CTable] table - - cdef int K = len(arrays) - - fields.resize(K) - columns.resize(K) - for i in range(K): - arr = arrays[i] - c_name = tobytes(names[i]) - - fields[i].reset(new CField(c_name, arr.type.sp_type, True)) - columns[i].reset(new CColumn(fields[i], arr.sp_array)) - - if name is None: - c_name = '' - else: - c_name = tobytes(name) - - schema.reset(new CSchema(fields)) - table.reset(new CTable(c_name, schema, columns)) - - result = Table() - result.init(table) - - return result - - def to_pandas(self): - """ - Convert the arrow::Table to a pandas DataFrame - """ - cdef: - PyObject* arr - shared_ptr[CColumn] col - - import pandas as pd - - names = [] - data = [] - for i in range(self.table.num_columns()): - col = self.table.column(i) - check_status(pyarrow.ArrowToPandas(col, &arr)) - names.append(frombytes(col.get().name())) - data.append( arr) - - return pd.DataFrame(dict(zip(names, data)), columns=names) - - def name(self): - self._check_nullptr() - return frombytes(self.table.name()) - - def schema(self): - raise NotImplementedError() - - def column(self, index_or_name): - self._check_nullptr() - index = index_or_name - # TODO(uwe): Implement retrevial by name - cdef Column column = Column() - column.init(self.table.column(index)) - return column - - def columns(self): - for i in range(self.num_columns()): - yield self.column(i) - - def num_columns(self): - self._check_nullptr() - return self.table.num_columns() - - def num_rows(self): - self._check_nullptr() - return self.table.num_rows() - - property shape: - - def __get__(self): - return (self.num_rows(), self.num_columns()) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 42f1f25073d..b2ef45a347b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -149,7 +149,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_string GetString(int i) cdef cppclass CChunkedArray" arrow::ChunkedArray": - pass + int64_t length() + int64_t null_count() + int num_chunks() + const shared_ptr[CArray]& chunk(int i) cdef cppclass CColumn" arrow::Column": CColumn(const shared_ptr[CField]& field, diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd index 61458b765c7..f2cb776eb2e 100644 --- a/python/pyarrow/schema.pxd +++ b/python/pyarrow/schema.pxd @@ -41,5 +41,7 @@ cdef class Schema: CSchema* schema cdef init(self, const vector[shared_ptr[CField]]& fields) + cdef init_schema(self, const shared_ptr[CSchema]& schema) cdef DataType box_data_type(const shared_ptr[CDataType]& type) +cdef Schema box_schema(const shared_ptr[CSchema]& schema) diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx index b3bf02aad76..22ddf0cf17e 100644 --- a/python/pyarrow/schema.pyx +++ b/python/pyarrow/schema.pyx @@ -106,6 +106,10 @@ cdef class Schema: self.schema = new CSchema(fields) self.sp_schema.reset(self.schema) + cdef init_schema(self, const shared_ptr[CSchema]& schema): + self.schema = schema.get() + self.sp_schema = schema + @classmethod def from_fields(cls, fields): cdef: @@ -223,3 +227,8 @@ cdef DataType box_data_type(const shared_ptr[CDataType]& type): cdef DataType out = DataType() out.init(type) return out + +cdef Schema box_schema(const shared_ptr[CSchema]& type): + cdef Schema out = Schema() + out.init_schema(type) + return out diff --git a/python/pyarrow/table.pxd b/python/pyarrow/table.pxd new file mode 100644 index 00000000000..0a5c122c95c --- /dev/null +++ b/python/pyarrow/table.pxd @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyarrow.includes.common cimport shared_ptr +from pyarrow.includes.libarrow cimport CChunkedArray, CColumn, CTable + + +cdef class ChunkedArray: + cdef: + shared_ptr[CChunkedArray] sp_chunked_array + CChunkedArray* chunked_array + + cdef init(self, const shared_ptr[CChunkedArray]& chunked_array) + cdef _check_nullptr(self) + + +cdef class Column: + cdef: + shared_ptr[CColumn] sp_column + CColumn* column + + cdef init(self, const shared_ptr[CColumn]& column) + cdef _check_nullptr(self) + + +cdef class Table: + cdef: + shared_ptr[CTable] sp_table + CTable* table + + cdef init(self, const shared_ptr[CTable]& table) + cdef _check_nullptr(self) diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx new file mode 100644 index 00000000000..2fb8ed5f0ae --- /dev/null +++ b/python/pyarrow/table.pyx @@ -0,0 +1,264 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + +from pyarrow.includes.libarrow cimport * +cimport pyarrow.includes.pyarrow as pyarrow + +import pyarrow.config + +from pyarrow.array cimport Array, box_arrow_array +from pyarrow.compat import frombytes, tobytes +from pyarrow.error cimport check_status +from pyarrow.schema cimport box_data_type, box_schema + +cdef class ChunkedArray: + ''' + Do not call this class's constructor directly. + ''' + + def __cinit__(self): + self.chunked_array = NULL + + cdef init(self, const shared_ptr[CChunkedArray]& chunked_array): + self.sp_chunked_array = chunked_array + self.chunked_array = chunked_array.get() + + cdef _check_nullptr(self): + if self.chunked_array == NULL: + raise ReferenceError("ChunkedArray object references a NULL pointer." + "Not initialized.") + + def length(self): + self._check_nullptr() + return self.chunked_array.length() + + def __len__(self): + return self.length() + + property null_count: + + def __get__(self): + self._check_nullptr() + return self.chunked_array.null_count() + + property num_chunks: + + def __get__(self): + self._check_nullptr() + return self.chunked_array.num_chunks() + + def chunk(self, i): + self._check_nullptr() + return box_arrow_array(self.chunked_array.chunk(i)) + + + def iterchunks(self): + for i in range(self.length()): + yield self.chunk(i) + + +cdef class Column: + ''' + Do not call this class's constructor directly. + ''' + + def __cinit__(self): + self.column = NULL + + cdef init(self, const shared_ptr[CColumn]& column): + self.sp_column = column + self.column = column.get() + + def to_pandas(self): + """ + Convert the arrow::Column to a pandas Series + """ + cdef: + PyObject* arr + + import pandas as pd + + check_status(pyarrow.ArrowToPandas(self.sp_column, &arr)) + return pd.Series(arr, name=self.name) + + cdef _check_nullptr(self): + if self.column == NULL: + raise ReferenceError("Column object references a NULL pointer." + "Not initialized.") + + def __len__(self): + self._check_nullptr() + return self.column.length() + + def length(self): + self._check_nullptr() + return self.column.length() + + property shape: + + def __get__(self): + self._check_nullptr() + return (self.length(),) + + property null_count: + + def __get__(self): + self._check_nullptr() + return self.column.null_count() + + property name: + + def __get__(self): + return frombytes(self.column.name()) + + property type: + + def __get__(self): + return box_data_type(self.column.type()) + + property data: + + def __get__(self): + cdef ChunkedArray chunked_array = ChunkedArray() + chunked_array.init(self.column.data()) + return chunked_array + + +cdef class Table: + ''' + Do not call this class's constructor directly. + ''' + + def __cinit__(self): + self.table = NULL + + cdef init(self, const shared_ptr[CTable]& table): + self.sp_table = table + self.table = table.get() + + cdef _check_nullptr(self): + if self.table == NULL: + raise ReferenceError("Table object references a NULL pointer." + "Not initialized.") + + @staticmethod + def from_pandas(df, name=None): + pass + + @staticmethod + def from_arrays(names, arrays, name=None): + cdef: + Array arr + Table result + c_string c_name + vector[shared_ptr[CField]] fields + vector[shared_ptr[CColumn]] columns + shared_ptr[CSchema] schema + shared_ptr[CTable] table + + cdef int K = len(arrays) + + fields.resize(K) + columns.resize(K) + for i in range(K): + arr = arrays[i] + c_name = tobytes(names[i]) + + fields[i].reset(new CField(c_name, arr.type.sp_type, True)) + columns[i].reset(new CColumn(fields[i], arr.sp_array)) + + if name is None: + c_name = '' + else: + c_name = tobytes(name) + + schema.reset(new CSchema(fields)) + table.reset(new CTable(c_name, schema, columns)) + + result = Table() + result.init(table) + + return result + + def to_pandas(self): + """ + Convert the arrow::Table to a pandas DataFrame + """ + cdef: + PyObject* arr + shared_ptr[CColumn] col + + import pandas as pd + + names = [] + data = [] + for i in range(self.table.num_columns()): + col = self.table.column(i) + check_status(pyarrow.ArrowToPandas(col, &arr)) + names.append(frombytes(col.get().name())) + data.append( arr) + + return pd.DataFrame(dict(zip(names, data)), columns=names) + + property name: + + def __get__(self): + self._check_nullptr() + return frombytes(self.table.name()) + + property schema: + + def __get__(self): + raise box_schema(self.table.schema()) + + def column(self, index): + self._check_nullptr() + cdef Column column = Column() + column.init(self.table.column(index)) + return column + + def __getitem__(self, i): + return self.column(i) + + def itercolumns(self): + for i in range(self.num_columns): + yield self.column(i) + + property num_columns: + + def __get__(self): + self._check_nullptr() + return self.table.num_columns() + + property num_rows: + + def __get__(self): + self._check_nullptr() + return self.table.num_rows() + + def __len__(self): + return self.num_rows + + property shape: + + def __get__(self): + return (self.num_rows, self.num_columns) + diff --git a/python/pyarrow/tests/test_column.py b/python/pyarrow/tests/test_column.py index fa862550829..b62f58236e0 100644 --- a/python/pyarrow/tests/test_column.py +++ b/python/pyarrow/tests/test_column.py @@ -31,7 +31,7 @@ def test_basics(self): ] table = A.Table.from_arrays(('a'), data, 'table_name') column = table.column(0) - assert column.name() == 'a' + assert column.name == 'a' assert column.length() == 5 assert len(column) == 5 assert column.shape == (5,) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 797a3a75b54..83fcbb8faff 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -50,11 +50,16 @@ def test_basics(self): A.from_pylist([-10, -5, 0, 5, 10]) ] table = A.Table.from_arrays(('a', 'b'), data, 'table_name') - assert table.name() == 'table_name' - assert table.num_rows() == 5 - assert table.num_columns() == 2 + assert table.name == 'table_name' + assert len(table) == 5 + assert table.num_rows == 5 + assert table.num_columns == 2 assert table.shape == (5, 2) + for col in table.itercolumns(): + for chunk in col.data.iterchunks(): + assert chunk is not None + def test_pandas(self): data = [ A.from_pylist(range(5)), diff --git a/python/setup.py b/python/setup.py index 5cc871aba9f..ebd80de46b4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -214,7 +214,7 @@ def get_ext_built(self, name): return name + suffix def get_cmake_cython_names(self): - return ['array', 'config', 'error', 'scalar', 'schema'] + return ['array', 'config', 'error', 'scalar', 'schema', 'table'] def get_names(self): return self._found_names From b01b2010fb54fba1bb8e8560d44059ea97a7ca5a Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 31 Mar 2016 23:15:28 +0200 Subject: [PATCH 3/3] Use correct number of chunks --- python/pyarrow/table.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx index 2fb8ed5f0ae..4c4816f0c7e 100644 --- a/python/pyarrow/table.pyx +++ b/python/pyarrow/table.pyx @@ -71,7 +71,7 @@ cdef class ChunkedArray: def iterchunks(self): - for i in range(self.length()): + for i in range(self.num_chunks): yield self.chunk(i) @@ -125,7 +125,7 @@ cdef class Column: return self.column.null_count() property name: - + def __get__(self): return frombytes(self.column.name()) @@ -219,7 +219,7 @@ cdef class Table: return pd.DataFrame(dict(zip(names, data)), columns=names) property name: - + def __get__(self): self._check_nullptr() return frombytes(self.table.name()) @@ -234,7 +234,7 @@ cdef class Table: cdef Column column = Column() column.init(self.table.column(index)) return column - + def __getitem__(self, i): return self.column(i)