From c21545bad8adb54302d5c6da13fe3a5e97963254 Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Sun, 11 Dec 2022 12:30:32 -0700 Subject: [PATCH 1/8] feat: Add use_wcwidth for Asian character support --- requirements.txt | 1 + table2ascii/options.py | 1 + table2ascii/table_to_ascii.py | 40 +++++++++++++++++++++++++++++------ tests/test_convert.py | 35 ++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 4037efa..7b90c5f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +wcwidth<1 typing-extensions>=3.7.4; python_version<'3.8' \ No newline at end of file diff --git a/table2ascii/options.py b/table2ascii/options.py index 998edbd..fb4258f 100644 --- a/table2ascii/options.py +++ b/table2ascii/options.py @@ -16,3 +16,4 @@ class Options: alignments: list[Alignment] | None cell_padding: int style: TableStyle + use_wcwidth: bool diff --git a/table2ascii/table_to_ascii.py b/table2ascii/table_to_ascii.py index 70359c8..68fc659 100644 --- a/table2ascii/table_to_ascii.py +++ b/table2ascii/table_to_ascii.py @@ -2,6 +2,8 @@ from math import ceil, floor +from wcwidth import wcswidth + from .alignment import Alignment from .annotations import SupportsStr from .options import Options @@ -35,6 +37,7 @@ def __init__( self.__first_col_heading = options.first_col_heading self.__last_col_heading = options.last_col_heading self.__cell_padding = options.cell_padding + self.__use_wcwidth = options.use_wcwidth # calculate number of columns self.__columns = self.__count_columns() @@ -86,7 +89,7 @@ def __auto_column_widths(self) -> list[int]: def widest_line(value: SupportsStr) -> int: """Returns the width of the longest line in a multi-line string""" text = str(value) - return max(len(line) for line in text.splitlines()) if len(text) else 0 + return max(self.__str_width(line) for line in text.splitlines()) if len(text) else 0 column_widths = [] # get the width necessary for each column @@ -140,17 +143,18 @@ def __pad(self, cell_value: SupportsStr, width: int, alignment: Alignment) -> st text = str(cell_value) padding = " " * self.__cell_padding padded_text = f"{padding}{text}{padding}" + text_width = self.__str_width(padded_text) if alignment == Alignment.LEFT: # pad with spaces on the end - return padded_text + (" " * (width - len(padded_text))) + return padded_text + (" " * (width - text_width)) if alignment == Alignment.CENTER: # pad with spaces, half on each side - before = " " * floor((width - len(padded_text)) / 2) - after = " " * ceil((width - len(padded_text)) / 2) + before = " " * floor((width - text_width) / 2) + after = " " * ceil((width - text_width) / 2) return before + padded_text + after if alignment == Alignment.RIGHT: # pad with spaces at the beginning - return (" " * (width - len(padded_text))) + padded_text + return (" " * (width - text_width)) + padded_text raise ValueError(f"The value '{alignment}' is not valid for alignment.") def __row_to_ascii( @@ -339,6 +343,23 @@ def __body_to_ascii(self, body: list[list[SupportsStr]]) -> str: for row in body ) + def __str_width(self, text: str) -> int: + """ + Returns the width of the string in characters for the purposes of monospace + formatting. This is usually the same as the length of the string, but can be + different for double-width characters (East Asian Wide and East Asian Fullwidth) + or zero-width characters (combining characters, zero-width space, etc.) + + Args: + text: The text to measure + + Returns: + The width of the string in characters + """ + width = wcswidth(text) if self.__use_wcwidth else -1 + # if use_wcwidth is False or wcswidth fails, fall back to len + return width if width >= 0 else len(text) + def to_ascii(self) -> str: """Generates a formatted ASCII table @@ -375,6 +396,7 @@ def table2ascii( alignments: list[Alignment] | None = None, cell_padding: int = 1, style: TableStyle = PresetStyle.double_thin_compact, + use_wcwidth: bool = False, ) -> str: """Convert a 2D Python table to ASCII text @@ -391,7 +413,7 @@ def table2ascii( Defaults to :py:obj:`False`. column_widths: List of widths in characters for each column. Any value of :py:obj:`None` indicates that the column width should be determined automatically. If :py:obj:`None` - is passed instead of a :py:obj:`~typing.List`, all columns will be automatically sized. + is passed instead of a :class:`list`, all columns will be automatically sized. Defaults to :py:obj:`None`. alignments: List of alignments for each column (ex. ``[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]``). If not specified or set to @@ -401,6 +423,11 @@ def table2ascii( Defaults to ``1``. style: Table style to use for styling (preset styles can be imported). Defaults to :ref:`PresetStyle.double_thin_compact `. + use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of + :func:`len`. This is useful when dealing with double-width characters + (East Asian Wide and East Asian Fullwidth) or zero-width characters + (combining characters, zero-width space, etc.) which are not properly handled by :func:`len`. + Defaults to :py:obj:`False`. Returns: The generated ASCII table @@ -416,5 +443,6 @@ def table2ascii( alignments=alignments, cell_padding=cell_padding, style=style, + use_wcwidth=use_wcwidth, ), ).to_ascii() diff --git a/tests/test_convert.py b/tests/test_convert.py index 3e7a045..8967dd6 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -246,3 +246,38 @@ def test_multiline_cells(): "╚═══════════════════════════════════════════╝" ) assert text == expected + + +def test_east_asian_wide_characters_and_zero_width(): + # using len() to count the number of characters + text = t2a( + header=["日期", "test"], + body=[["2022/12/11", "test"], ["2022/1/1", "測試"]], + cell_padding=5, + ) + expected = ( + "╔═══════════════════════════════════╗\n" + "║ 日期 test ║\n" + "╟───────────────────────────────────╢\n" + "║ 2022/12/11 test ║\n" + "║ 2022/1/1 測試 ║\n" + "╚═══════════════════════════════════╝" + ) + assert text == expected + + # using wcwidth.wcswidth() to count the number of characters + text = t2a( + header=["日期", "test"], + body=[["2022/12/11", "test"], ["2022/1/1", "測試"]], + cell_padding=5, + use_wcwidth=True, + ) + expected = ( + "╔═══════════════════════════════════╗\n" + "║ 日期 test ║\n" + "╟───────────────────────────────────╢\n" + "║ 2022/12/11 test ║\n" + "║ 2022/1/1 測試 ║\n" + "╚═══════════════════════════════════╝" + ) + assert text == expected From 2d86c691dbf94e3b5c84d3e7514224cd2fb3d833 Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Sun, 11 Dec 2022 12:37:56 -0700 Subject: [PATCH 2/8] Ignore missing types in wcwidth --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1c5a9d5..ae84208 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,5 +77,6 @@ namespace_packages = true [[tool.mypy.overrides]] module = [ "setuptools.*", + "wcwidth" ] ignore_missing_imports = true From cab0f576bd059bda418fd6cdb2551599553dd63c Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Sun, 11 Dec 2022 14:47:47 -0700 Subject: [PATCH 3/8] feat!: Make wcwidth the default --- table2ascii/table_to_ascii.py | 10 +++++----- tests/test_convert.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/table2ascii/table_to_ascii.py b/table2ascii/table_to_ascii.py index 68fc659..b8c285c 100644 --- a/table2ascii/table_to_ascii.py +++ b/table2ascii/table_to_ascii.py @@ -396,7 +396,7 @@ def table2ascii( alignments: list[Alignment] | None = None, cell_padding: int = 1, style: TableStyle = PresetStyle.double_thin_compact, - use_wcwidth: bool = False, + use_wcwidth: bool = True, ) -> str: """Convert a 2D Python table to ASCII text @@ -424,10 +424,10 @@ def table2ascii( style: Table style to use for styling (preset styles can be imported). Defaults to :ref:`PresetStyle.double_thin_compact `. use_wcwidth: Whether to use :func:`wcwidth.wcswidth` to determine the width of each cell instead of - :func:`len`. This is useful when dealing with double-width characters - (East Asian Wide and East Asian Fullwidth) or zero-width characters - (combining characters, zero-width space, etc.) which are not properly handled by :func:`len`. - Defaults to :py:obj:`False`. + :func:`len`. The :func:`~wcwidth.wcswidth` function takes into account double-width characters + (East Asian Wide and East Asian Fullwidth) and zero-width characters (combining characters, + zero-width space, etc.), whereas :func:`len` determines the width solely based on the number of + characters in the string. Defaults to :py:obj:`True`. Returns: The generated ASCII table diff --git a/tests/test_convert.py b/tests/test_convert.py index 8967dd6..c3acc5f 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -254,6 +254,7 @@ def test_east_asian_wide_characters_and_zero_width(): header=["日期", "test"], body=[["2022/12/11", "test"], ["2022/1/1", "測試"]], cell_padding=5, + use_wcwidth=False, ) expected = ( "╔═══════════════════════════════════╗\n" From 8524363969004e6088b186706b39fda95970c73f Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Mon, 12 Dec 2022 02:30:18 -0700 Subject: [PATCH 4/8] docs: Add version directives --- table2ascii/options.py | 7 ++++++- table2ascii/table_to_ascii.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/table2ascii/options.py b/table2ascii/options.py index fb4258f..a0ae142 100644 --- a/table2ascii/options.py +++ b/table2ascii/options.py @@ -8,7 +8,12 @@ @dataclass class Options: - """Class for storing options that the user sets""" + """Class for storing options that the user sets + + .. versionchanged:: 1.0.0 + + Added ``use_wcwidth`` option + """ first_col_heading: bool last_col_heading: bool diff --git a/table2ascii/table_to_ascii.py b/table2ascii/table_to_ascii.py index b8c285c..29af070 100644 --- a/table2ascii/table_to_ascii.py +++ b/table2ascii/table_to_ascii.py @@ -400,6 +400,9 @@ def table2ascii( ) -> str: """Convert a 2D Python table to ASCII text + .. versionchanged:: 1.0.0 + Added the ``use_wcwidth`` parameter defaulting to :py:obj:`True`. + Args: header: List of column values in the table's header row. All values should be :class:`str` or support :class:`str` conversion. If not specified, the table will not have a header row. From 456843c0ee7d8c9dbd8a530d4cd94f6f0a9c63bd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 09:30:29 +0000 Subject: [PATCH 5/8] style: auto fixes from pre-commit hooks --- table2ascii/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table2ascii/options.py b/table2ascii/options.py index a0ae142..9f73a16 100644 --- a/table2ascii/options.py +++ b/table2ascii/options.py @@ -9,7 +9,7 @@ @dataclass class Options: """Class for storing options that the user sets - + .. versionchanged:: 1.0.0 Added ``use_wcwidth`` option From 2eebc1c97686466b1ac33a51dfef6eb89c8c5d15 Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Mon, 12 Dec 2022 08:31:56 -0700 Subject: [PATCH 6/8] Sort dependencies --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7b90c5f..b6ab4e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -wcwidth<1 -typing-extensions>=3.7.4; python_version<'3.8' \ No newline at end of file +typing-extensions>=3.7.4; python_version<'3.8' +wcwidth<1 \ No newline at end of file From 55225d7182fa94817246d7123d314e6ce994bae4 Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Mon, 12 Dec 2022 08:52:24 -0700 Subject: [PATCH 7/8] ci: Include emoji and u200b in tests --- tests/test_convert.py | 59 ++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/tests/test_convert.py b/tests/test_convert.py index c3acc5f..87a2d91 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -248,37 +248,50 @@ def test_multiline_cells(): assert text == expected -def test_east_asian_wide_characters_and_zero_width(): - # using len() to count the number of characters +def test_east_asian_wide_characters_and_zero_width_wcwidth(): + # using wcwidth.wcswidth() to count the number of characters text = t2a( - header=["日期", "test"], - body=[["2022/12/11", "test"], ["2022/1/1", "測試"]], - cell_padding=5, - use_wcwidth=False, + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + ) + text2 = t2a( + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + use_wcwidth=True, ) expected = ( - "╔═══════════════════════════════════╗\n" - "║ 日期 test ║\n" - "╟───────────────────────────────────╢\n" - "║ 2022/12/11 test ║\n" - "║ 2022/1/1 測試 ║\n" - "╚═══════════════════════════════════╝" + "╔════╦═══════════════════╗\n" + "║ #​ ║ 🦁 🦡 🦅 🐍 ║\n" + "╟────╫───────────────────╢\n" + "║ 💻 ║ ✅ ✅ ❌ ❌ ║\n" + "╟────╫───────────────────╢\n" + "║ 🥞 ║ 日 月 火 水 ║\n" + "╚════╩═══════════════════╝" ) assert text == expected + assert text2 == expected - # using wcwidth.wcswidth() to count the number of characters + +def test_east_asian_wide_characters_and_zero_width_no_wcwidth(): + # using len() to count the number of characters text = t2a( - header=["日期", "test"], - body=[["2022/12/11", "test"], ["2022/1/1", "測試"]], - cell_padding=5, - use_wcwidth=True, + header=["#\u200b", "🦁", "🦡", "🦅", "🐍"], + body=[["💻", "✅", "✅", "❌", "❌"]], + footer=["🥞", "日", "月", "火", "水"], + first_col_heading=True, + use_wcwidth=False, ) expected = ( - "╔═══════════════════════════════════╗\n" - "║ 日期 test ║\n" - "╟───────────────────────────────────╢\n" - "║ 2022/12/11 test ║\n" - "║ 2022/1/1 測試 ║\n" - "╚═══════════════════════════════════╝" + "╔════╦═══════════════╗\n" + "║ #​ ║ 🦁 🦡 🦅 🐍 ║\n" + "╟────╫───────────────╢\n" + "║ 💻 ║ ✅ ✅ ❌ ❌ ║\n" + "╟────╫───────────────╢\n" + "║ 🥞 ║ 日 月 火 水 ║\n" + "╚════╩═══════════════╝" ) assert text == expected From 3cac994269dd91e3df17c3998774452c2589c0d6 Mon Sep 17 00:00:00 2001 From: Jonah Lawrence Date: Mon, 12 Dec 2022 12:26:25 -0700 Subject: [PATCH 8/8] docs: Update readme options table --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d21fb58..0852081 100644 --- a/README.md +++ b/README.md @@ -165,15 +165,18 @@ All parameters are optional. | Option | Type | Default | Description | | :-----------------: | :-------------------: | :-------------------: | :-------------------------------------------------------------------------------: | -| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()`. | -| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()`. | -| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()`. | +| `header` | `List[Any]` | `None` | First table row seperated by header row separator. Values should support `str()` | +| `body` | `List[List[Any]]` | `None` | List of rows for the main section of the table. Values should support `str()` | +| `footer` | `List[Any]` | `None` | Last table row seperated by header row separator. Values should support `str()` | | `column_widths` | `List[Optional[int]]` | `None` (automatic) | List of column widths in characters for each column | | `alignments` | `List[Alignment]` | `None` (all centered) | Column alignments
(ex. `[Alignment.LEFT, Alignment.CENTER, Alignment.RIGHT]`) | | `style` | `TableStyle` | `double_thin_compact` | Table style to use for the table\* | | `first_col_heading` | `bool` | `False` | Whether to add a heading column separator after the first column | | `last_col_heading` | `bool` | `False` | Whether to add a heading column separator before the last column | -| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border. | +| `cell_padding` | `int` | `1` | The minimum number of spaces to add between the cell content and the cell border | +| `use_wcwidth` | `bool` | `True` | Whether to use [wcwidth][wcwidth] instead of `len()` to calculate cell width | + +[wcwidth]: https://pypi.org/project/wcwidth/ \*See a list of all preset styles [here](https://table2ascii.readthedocs.io/en/latest/styles.html).