diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst index a58b460fef409c..960fd84fe20641 100644 --- a/Doc/library/textwrap.rst +++ b/Doc/library/textwrap.rst @@ -282,6 +282,14 @@ hyphenated words; only then will long words be broken if necessary, unless .. versionadded:: 3.4 + .. attribute:: text_len + + (default: ``len``) Used to determine the length of a string. You can + provide a custom function, e.g. to account for wide characters. + + .. versionadded:: 3.11 + + .. index:: single: ...; placeholder .. attribute:: placeholder diff --git a/Lib/idlelib/idle_test/test_calltip.py b/Lib/idlelib/idle_test/test_calltip.py index 28c196a42672fc..c1fea6076972f6 100644 --- a/Lib/idlelib/idle_test/test_calltip.py +++ b/Lib/idlelib/idle_test/test_calltip.py @@ -105,7 +105,7 @@ def test_signature_wrap(self): (width=70, initial_indent='', subsequent_indent='', expand_tabs=True, replace_whitespace=True, fix_sentence_endings=False, break_long_words=True, drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None, - placeholder=' [...]') + placeholder=' [...]', text_len=) Object for wrapping/filling text. The public interface consists of the wrap() and fill() methods; the other methods are just there for subclasses to override in order to tweak the default behaviour. diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py index aca1f427656bb5..c7423b993cab89 100644 --- a/Lib/test/test_textwrap.py +++ b/Lib/test/test_textwrap.py @@ -9,6 +9,7 @@ # import unittest +import unicodedata from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten @@ -1133,5 +1134,72 @@ def test_first_word_too_long_but_placeholder_fits(self): self.check_shorten("Helloo", 5, "[...]") +class WideCharacterTestCase(BaseTestCase): + def text_len(self, text): + return sum( + 2 if unicodedata.east_asian_width(c) in {'F', 'W'} else 1 + for c in text + ) + + def check_shorten(self, text, width, expect, **kwargs): + result = shorten(text, width, **kwargs) + self.check(result, expect) + + def test_wrap(self): + text = "123 🔧" + self.check_wrap(text, 5, ["123 🔧"]) + self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len) + + def test_wrap_initial_indent(self): + text = "12 12" + self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧") + self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧", + text_len=self.text_len) + + def test_wrap_subsequent_indent(self): + text = "12 12 12 12" + self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧") + self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"], + subsequent_indent="🔧", text_len=self.text_len) + + def test_shorten(self): + text = "123 1234🔧" + expected = "123 [...]" + self.check_shorten(text, 9, "123 1234🔧") + self.check_shorten(text, 9, "123 [...]", text_len=self.text_len) + + def test_shorten_placeholder(self): + text = "123 1 123" + self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧") + self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧", + text_len=self.text_len) + + +class CustomWidthTestCase(BaseTestCase): + def text_len(self, text): + lengths = { + 'A': 4, + 'B': 2, + 'Q': 0, + } + + return sum( + lengths[c] if c in lengths else 1 + for c in text + ) + + def test_zero_width_text_len(self): + text = "0QQ1234QQ56789" + self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len) + + def test_char_longer_than_width(self): + text = "AA0123" + self.check_wrap(text, 3, ["A", "A", "012", "3"], text_len=self.text_len) + + def test_next_char_overflow(self): + text = "BB0123" + self.check_wrap(text, 3, ["B", "B0", "123"], text_len=self.text_len) + + if __name__ == '__main__': unittest.main() diff --git a/Lib/textwrap.py b/Lib/textwrap.py index 41366fbf443a4f..69e25e5cf6e804 100644 --- a/Lib/textwrap.py +++ b/Lib/textwrap.py @@ -122,7 +122,8 @@ def __init__(self, tabsize=8, *, max_lines=None, - placeholder=' [...]'): + placeholder=' [...]', + text_len=len): self.width = width self.initial_indent = initial_indent self.subsequent_indent = subsequent_indent @@ -135,6 +136,7 @@ def __init__(self, self.tabsize = tabsize self.max_lines = max_lines self.placeholder = placeholder + self.text_len = text_len # -- Private methods ----------------------------------------------- @@ -194,6 +196,25 @@ def _fix_sentence_endings(self, chunks): else: i += 1 + def _find_width_index(self, text, width): + """_find_length_index(text : string, width : int) + + Find at which index the text has the required width, since when using a + different text_len, this index will not be equal to the required width. + """ + # When using default len as self.text_len, the required index and width + # will be equal, this prevents calculation time. + if self.text_len(text[:width]) == width: + # For character widths greater than one, width can be more than the + # number of characters + return min(width, len(text)) + cur_text = '' + for i, c in enumerate(text): + cur_text += c + cur_width = self.text_len(cur_text) + if cur_width > width: + return max(i, 1) + def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): """_handle_long_word(chunks : [string], cur_line : [string], @@ -214,10 +235,11 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): if self.break_long_words and space_left > 0: end = space_left chunk = reversed_chunks[-1] - if self.break_on_hyphens and len(chunk) > space_left: + end = self._find_width_index(chunk, space_left) + if self.break_on_hyphens and self.text_len(chunk) > space_left: # break after last hyphen, but only if there are # non-hyphens before it - hyphen = chunk.rfind('-', 0, space_left) + hyphen = chunk.rfind('-', 0, end) if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]): end = hyphen + 1 cur_line.append(chunk[:end]) @@ -256,7 +278,8 @@ def _wrap_chunks(self, chunks): indent = self.subsequent_indent else: indent = self.initial_indent - if len(indent) + len(self.placeholder.lstrip()) > self.width: + if (self.text_len(indent) + + self.text_len(self.placeholder.lstrip()) > self.width): raise ValueError("placeholder too large for max width") # Arrange in reverse order so items can be efficiently popped @@ -277,7 +300,7 @@ def _wrap_chunks(self, chunks): indent = self.initial_indent # Maximum width for this line. - width = self.width - len(indent) + width = self.width - self.text_len(indent) # First chunk on line is whitespace -- drop it, unless this # is the very beginning of the text (ie. no lines started yet). @@ -285,7 +308,7 @@ def _wrap_chunks(self, chunks): del chunks[-1] while chunks: - l = len(chunks[-1]) + l = self.text_len(chunks[-1]) # Can at least squeeze this chunk onto the current line. if cur_len + l <= width: @@ -298,13 +321,13 @@ def _wrap_chunks(self, chunks): # The current line is full, and the next chunk is too big to # fit on *any* line (not just this one). - if chunks and len(chunks[-1]) > width: + if chunks and self.text_len(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) - cur_len = sum(map(len, cur_line)) + cur_len = sum(map(self.text_len, cur_line)) # If the last chunk on this line is all whitespace, drop it. if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': - cur_len -= len(cur_line[-1]) + cur_len -= self.text_len(cur_line[-1]) del cur_line[-1] if cur_line: @@ -320,16 +343,17 @@ def _wrap_chunks(self, chunks): else: while cur_line: if (cur_line[-1].strip() and - cur_len + len(self.placeholder) <= width): + cur_len + self.text_len(self.placeholder) <= width): cur_line.append(self.placeholder) lines.append(indent + ''.join(cur_line)) break - cur_len -= len(cur_line[-1]) + cur_len -= self.text_len(cur_line[-1]) del cur_line[-1] else: if lines: prev_line = lines[-1].rstrip() - if (len(prev_line) + len(self.placeholder) <= + if (self.text_len(prev_line) + + self.text_len(self.placeholder) <= self.width): lines[-1] = prev_line + self.placeholder break