From 9857368647999b0ed69a03c824195a3ee85ae0e6 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 13 Jan 2022 10:55:59 -0800 Subject: [PATCH 1/4] Add character limit to Table.to_string() --- python/pyarrow/table.pxi | 12 +++++++----- python/pyarrow/tests/test_table.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index b12016c7d59..a5a69ed9476 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2726,7 +2726,7 @@ cdef class Table(_PandasConvertible): raise TypeError("Do not call Table's constructor directly, use one of " "the `Table.from_*` functions instead.") - def to_string(self, *, show_metadata=False, preview_cols=0): + def to_string(self, *, show_metadata=False, preview_cols=0, cols_char_limit=120): """ Return human-readable string representation of Table. @@ -2736,6 +2736,8 @@ cdef class Table(_PandasConvertible): Display Field-level and Schema-level KeyValueMetadata. preview_cols : int, default 0 Display values of the columns for the first N columns. + cols_char_limit : int, default 200 + Max number of characters to show in each column preview Returns ------- @@ -2751,10 +2753,10 @@ cdef class Table(_PandasConvertible): if preview_cols: pieces.append('----') for i in range(min(self.num_columns, preview_cols)): - pieces.append('{}: {}'.format( - self.field(i).name, - self.column(i).to_string(indent=0, skip_new_lines=True) - )) + col_string = self.column(i).to_string(indent=0, skip_new_lines=True) + if len(col_string) > cols_char_limit: + col_string = col_string[:(cols_char_limit - 3)] + '...' + pieces.append('{}: {}'.format(self.field(i).name, col_string)) if preview_cols < self.num_columns: pieces.append('...') return '\n'.join(pieces) diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index bdbd2a7c8fc..935aa57e427 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -1805,6 +1805,24 @@ def test_table_repr_to_string_ellipsis(): c1: [[10,20,30,40,10,...,40,10,20,30,40]]""" +def test_table_repr_cols_char_limit(): + # Schema passed explicitly + schema = pa.schema([pa.field('c0', pa.int16(), + metadata={'key': 'value'}), + pa.field('c1', pa.int32())], + metadata={b'foo': b'bar'}) + + tab = pa.table([pa.array([1, 2, 3, 4]*10, type='int16'), + pa.array([10, 20, 30, 40]*10, type='int32')], + schema=schema) + assert tab.to_string(preview_cols=10, cols_char_limit=65) == """pyarrow.Table +c0: int16 +c1: int32 +---- +c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]] +c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,40,10,20,30...""" + + def test_table_function_unicode_schema(): col_a = "äääh" col_b = "öööf" From e985f23fd0822abb0c8cdd7baf044b27459ca4b8 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 13 Jan 2022 12:48:58 -0800 Subject: [PATCH 2/4] Fix lint --- python/pyarrow/table.pxi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index a5a69ed9476..4257f4cd9fa 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2753,7 +2753,8 @@ cdef class Table(_PandasConvertible): if preview_cols: pieces.append('----') for i in range(min(self.num_columns, preview_cols)): - col_string = self.column(i).to_string(indent=0, skip_new_lines=True) + col_string = self.column(i).to_string( + indent=0, skip_new_lines=True) if len(col_string) > cols_char_limit: col_string = col_string[:(cols_char_limit - 3)] + '...' pieces.append('{}: {}'.format(self.field(i).name, col_string)) From 68f3ea8cb1296a5829e05c36f294e322598c246e Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 18 Jan 2022 09:05:48 -0800 Subject: [PATCH 3/4] Try to close delimeters --- python/pyarrow/table.pxi | 22 ++++++++++++++++++++-- python/pyarrow/tests/test_table.py | 4 ++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 4257f4cd9fa..25a0d2f9a23 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -1408,6 +1408,24 @@ cdef _sanitize_arrays(arrays, names, schema, metadata, return converted_arrays +cdef _truncate_array_string(arr_str, length): + delim_stack = [] + partial_end = 0 + + for pos, char in enumerate(arr_str): + if char == "[": + delim_stack.append("]") + continue + elif char == "]": + assert delim_stack[-1] == "]" + delim_stack.pop() + elif char != ",": + continue + + if pos >= length: + return arr_str[:(pos + 1)] + "...]" * len(delim_stack) + + cdef class RecordBatch(_PandasConvertible): """ Batch of rows of columns of equal length @@ -2736,7 +2754,7 @@ cdef class Table(_PandasConvertible): Display Field-level and Schema-level KeyValueMetadata. preview_cols : int, default 0 Display values of the columns for the first N columns. - cols_char_limit : int, default 200 + cols_char_limit : int, default 120 Max number of characters to show in each column preview Returns @@ -2756,7 +2774,7 @@ cdef class Table(_PandasConvertible): col_string = self.column(i).to_string( indent=0, skip_new_lines=True) if len(col_string) > cols_char_limit: - col_string = col_string[:(cols_char_limit - 3)] + '...' + col_string = _truncate_array_string(col_string, cols_char_limit) pieces.append('{}: {}'.format(self.field(i).name, col_string)) if preview_cols < self.num_columns: pieces.append('...') diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py index 935aa57e427..e977902ae68 100644 --- a/python/pyarrow/tests/test_table.py +++ b/python/pyarrow/tests/test_table.py @@ -1815,12 +1815,12 @@ def test_table_repr_cols_char_limit(): tab = pa.table([pa.array([1, 2, 3, 4]*10, type='int16'), pa.array([10, 20, 30, 40]*10, type='int32')], schema=schema) - assert tab.to_string(preview_cols=10, cols_char_limit=65) == """pyarrow.Table + assert tab.to_string(preview_cols=10, cols_char_limit=50) == """pyarrow.Table c0: int16 c1: int32 ---- c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]] -c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,40,10,20,30...""" +c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,...]...]""" def test_table_function_unicode_schema(): From c4368fae77c16d7ab30b9a4eeee2076dd906f3aa Mon Sep 17 00:00:00 2001 From: Will Jones Date: Tue, 18 Jan 2022 09:22:37 -0800 Subject: [PATCH 4/4] Fix style --- python/pyarrow/table.pxi | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 25a0d2f9a23..3b918a83942 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -1411,7 +1411,7 @@ cdef _sanitize_arrays(arrays, names, schema, metadata, cdef _truncate_array_string(arr_str, length): delim_stack = [] partial_end = 0 - + for pos, char in enumerate(arr_str): if char == "[": delim_stack.append("]") @@ -2774,7 +2774,8 @@ cdef class Table(_PandasConvertible): col_string = self.column(i).to_string( indent=0, skip_new_lines=True) if len(col_string) > cols_char_limit: - col_string = _truncate_array_string(col_string, cols_char_limit) + col_string = _truncate_array_string( + col_string, cols_char_limit) pieces.append('{}: {}'.format(self.field(i).name, col_string)) if preview_cols < self.num_columns: pieces.append('...')