diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index b12016c7d59..3b918a83942 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -1408,6 +1408,24 @@ cdef _sanitize_arrays(arrays, names, schema, metadata, return converted_arrays +cdef _truncate_array_string(arr_str, length): + delim_stack = [] + partial_end = 0 + + for pos, char in enumerate(arr_str): + if char == "[": + delim_stack.append("]") + continue + elif char == "]": + assert delim_stack[-1] == "]" + delim_stack.pop() + elif char != ",": + continue + + if pos >= length: + return arr_str[:(pos + 1)] + "...]" * len(delim_stack) + + cdef class RecordBatch(_PandasConvertible): """ Batch of rows of columns of equal length @@ -2726,7 +2744,7 @@ cdef class Table(_PandasConvertible): raise TypeError("Do not call Table's constructor directly, use one of " "the `Table.from_*` functions instead.") - def to_string(self, *, show_metadata=False, preview_cols=0): + def to_string(self, *, show_metadata=False, preview_cols=0, cols_char_limit=120): """ Return human-readable string representation of Table. @@ -2736,6 +2754,8 @@ cdef class Table(_PandasConvertible): Display Field-level and Schema-level KeyValueMetadata. preview_cols : int, default 0 Display values of the columns for the first N columns. + cols_char_limit : int, default 120 + Max number of characters to show in each column preview Returns ------- @@ -2751,10 +2771,12 @@ cdef class Table(_PandasConvertible): if preview_cols: pieces.append('----') for i in range(min(self.num_columns, preview_cols)): - pieces.append('{}: {}'.format( - self.field(i).name, - self.column(i).to_string(indent=0, skip_new_lines=True) - )) + col_string = self.column(i).to_string( + indent=0, skip_new_lines=True) + if len(col_string) > cols_char_limit: + col_string = _truncate_array_string( + col_string, cols_char_limit) + pieces.append('{}: {}'.format(self.field(i).name, col_string)) if preview_cols < self.num_columns: pieces.append('...') return '\n'.join(pieces) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index bdbd2a7c8fc..e977902ae68 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -1805,6 +1805,24 @@ def test_table_repr_to_string_ellipsis(): c1: [[10,20,30,40,10,...,40,10,20,30,40]]""" +def test_table_repr_cols_char_limit(): + # Schema passed explicitly + schema = pa.schema([pa.field('c0', pa.int16(), + metadata={'key': 'value'}), + pa.field('c1', pa.int32())], + metadata={b'foo': b'bar'}) + + tab = pa.table([pa.array([1, 2, 3, 4]*10, type='int16'), + pa.array([10, 20, 30, 40]*10, type='int32')], + schema=schema) + assert tab.to_string(preview_cols=10, cols_char_limit=50) == """pyarrow.Table +c0: int16 +c1: int32 +---- +c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]] +c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,...]...]""" + + def test_table_function_unicode_schema(): col_a = "äääh" col_b = "öööf"