From a6b7fd8799efd59c7a5cebed74e5e2a307eacd3c Mon Sep 17 00:00:00 2001 From: sakkyoi <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:38 +0200 Subject: [PATCH 01/10] fix full-width characters issue (https://github.com/magmax/python-inquirer/issues/432) --- readchar/_win_read.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readchar/_win_read.py b/readchar/_win_read.py index c3c51c7..d91db04 100644 --- a/readchar/_win_read.py +++ b/readchar/_win_read.py @@ -7,8 +7,8 @@ def readchar() -> str: """Reads a single character from the input stream. Blocks until a character is available.""" - # manual byte decoding because some bytes in windows are not utf-8 encodable. - return chr(int.from_bytes(msvcrt.getch(), "big")) + # read a single wide character from the input. + return msvcrt.getwch() def readkey() -> str: @@ -21,7 +21,7 @@ def readkey() -> str: raise KeyboardInterrupt # if it is a normal character: - if ch not in "\x00\xe0": + if ch not in "\x00\xc3\xa0": return ch # if it is a scpeal key, read second half: From 8e48bfae76bb50457060b9205b146cf5cf9b4204 Mon Sep 17 00:00:00 2001 From: sakkyoi <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:38 +0200 Subject: [PATCH 02/10] Fix the issue that raises a bunch of OSError exceptions in the test script (https://github.com/magmax/python-readchar/issues/93) --- tests/windows/conftest.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/windows/conftest.py b/tests/windows/conftest.py index 39817fc..91f0556 100644 --- a/tests/windows/conftest.py +++ b/tests/windows/conftest.py @@ -3,8 +3,8 @@ import pytest -if sys.platform in ("win32", "cygwin"): - import msvcrt +# if sys.platform in ("win32", "cygwin"): +# import msvcrt # ignore all tests in this folder if not on windows @@ -14,10 +14,16 @@ def pytest_ignore_collect(path, config): @pytest.fixture -def patched_stdin(): +def patched_stdin(monkeypatch): class mocked_stdin: def push(self, string): - for c in string: - msvcrt.ungetch(ord(c).to_bytes(1, "big")) + # Create an iterator from the string + characters = iter(string) + + # Patch msvcrt.getwch to return the next character from the iterator. + # Don't use next(iter(string)) as it creates a new iterator each time. + # For example, + # if you use a new iterator, cursor_up (\x00\x48) will be read as \x00\x00. + monkeypatch.setattr("msvcrt.getwch", lambda: next(characters)) return mocked_stdin() From a18e428e3a33e07ab2973a67535f4f98ea0fead4 Mon Sep 17 00:00:00 2001 From: Sakkyoi Cheng <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:38 +0200 Subject: [PATCH 03/10] Update readchar/_win_read.py Co-authored-by: Jan Wille --- readchar/_win_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readchar/_win_read.py b/readchar/_win_read.py index d91db04..d742170 100644 --- a/readchar/_win_read.py +++ b/readchar/_win_read.py @@ -4,7 +4,7 @@ def readchar() -> str: - """Reads a single character from the input stream. + """Reads a single utf8-character from the input stream. Blocks until a character is available.""" # read a single wide character from the input. From d985142e5c8747a8be841b344b80fb66b92c9c69 Mon Sep 17 00:00:00 2001 From: Sakkyoi Cheng <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:38 +0200 Subject: [PATCH 04/10] Update tests/windows/conftest.py Co-authored-by: Jan Wille --- tests/windows/conftest.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/windows/conftest.py b/tests/windows/conftest.py index 91f0556..e8ffc5e 100644 --- a/tests/windows/conftest.py +++ b/tests/windows/conftest.py @@ -21,9 +21,6 @@ def push(self, string): characters = iter(string) # Patch msvcrt.getwch to return the next character from the iterator. - # Don't use next(iter(string)) as it creates a new iterator each time. - # For example, - # if you use a new iterator, cursor_up (\x00\x48) will be read as \x00\x00. monkeypatch.setattr("msvcrt.getwch", lambda: next(characters)) return mocked_stdin() From 258f9a24ce8417585e7d8a01cefd8c51d5d40066 Mon Sep 17 00:00:00 2001 From: Sakkyoi Cheng <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:39 +0200 Subject: [PATCH 05/10] Update tests/windows/conftest.py Co-authored-by: Jan Wille --- tests/windows/conftest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/windows/conftest.py b/tests/windows/conftest.py index e8ffc5e..0fc7f96 100644 --- a/tests/windows/conftest.py +++ b/tests/windows/conftest.py @@ -3,8 +3,6 @@ import pytest -# if sys.platform in ("win32", "cygwin"): -# import msvcrt # ignore all tests in this folder if not on windows From b71430ab4ef6c134882574ffc20f4d16b9fc84a8 Mon Sep 17 00:00:00 2001 From: Jan Wille Date: Sun, 11 Aug 2024 21:08:39 +0200 Subject: [PATCH 06/10] fix: windows sequence start chars are `\x00\xe0` --- readchar/_win_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readchar/_win_read.py b/readchar/_win_read.py index d742170..9dddbb0 100644 --- a/readchar/_win_read.py +++ b/readchar/_win_read.py @@ -21,7 +21,7 @@ def readkey() -> str: raise KeyboardInterrupt # if it is a normal character: - if ch not in "\x00\xc3\xa0": + if ch not in "\x00\xe0": return ch # if it is a scpeal key, read second half: From ddcac7e084144517e19b070741cdc64c9c05789f Mon Sep 17 00:00:00 2001 From: Jan Wille Date: Sun, 11 Aug 2024 21:08:39 +0200 Subject: [PATCH 07/10] fix pre-commit --- tests/windows/conftest.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/windows/conftest.py b/tests/windows/conftest.py index 0fc7f96..492d7d7 100644 --- a/tests/windows/conftest.py +++ b/tests/windows/conftest.py @@ -3,8 +3,6 @@ import pytest - - # ignore all tests in this folder if not on windows def pytest_ignore_collect(path, config): if sys.platform not in ("win32", "cygwin"): From 54ee1081784b7d76b0cd5fad4a385f0df6fec0f2 Mon Sep 17 00:00:00 2001 From: sakkyoi <22865542+sakkyoi@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:08:39 +0200 Subject: [PATCH 08/10] Fix UnicodeEncodeError when inputting emojis --- readchar/_win_read.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/readchar/_win_read.py b/readchar/_win_read.py index 9dddbb0..074ae51 100644 --- a/readchar/_win_read.py +++ b/readchar/_win_read.py @@ -6,9 +6,16 @@ def readchar() -> str: """Reads a single utf8-character from the input stream. Blocks until a character is available.""" + # read the first character. + ch = [msvcrt.getwch()] - # read a single wide character from the input. - return msvcrt.getwch() + # if the first character indicates a surrogate pair, read the second character. + if 0xD800 <= ord(ch[0]) <= 0xDFFF: + ch.append(msvcrt.getwch()) + + # combine the characters into a single utf-16 encoded string. + # this prevents the character from being treated as a surrogate pair again. + return "".join(ch).encode("utf-16", errors="surrogatepass").decode("utf-16") def readkey() -> str: From e64ded3169108c08c299a65d9d947255711572ab Mon Sep 17 00:00:00 2001 From: Jan Wille Date: Sun, 11 Aug 2024 21:08:40 +0200 Subject: [PATCH 09/10] move surrogate functionality into `readkey` --- readchar/_win_read.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/readchar/_win_read.py b/readchar/_win_read.py index 074ae51..f58464d 100644 --- a/readchar/_win_read.py +++ b/readchar/_win_read.py @@ -6,32 +6,36 @@ def readchar() -> str: """Reads a single utf8-character from the input stream. Blocks until a character is available.""" - # read the first character. - ch = [msvcrt.getwch()] - # if the first character indicates a surrogate pair, read the second character. - if 0xD800 <= ord(ch[0]) <= 0xDFFF: - ch.append(msvcrt.getwch()) - - # combine the characters into a single utf-16 encoded string. - # this prevents the character from being treated as a surrogate pair again. - return "".join(ch).encode("utf-16", errors="surrogatepass").decode("utf-16") + # read a single wide character from the input + return msvcrt.getwch() def readkey() -> str: """Reads the next keypress. If an escaped key is pressed, the full sequence is read and returned as noted in `_win_key.py`.""" + # read first character ch = readchar() + # keys like CTRL+C should cause a interrupt if ch in config.INTERRUPT_KEYS: raise KeyboardInterrupt - # if it is a normal character: - if ch not in "\x00\xe0": - return ch + # parse special multi character keys (see key module) + # https://learn.microsoft.com/cpp/c-runtime-library/reference/getch-getwch#remarks + if ch in "\x00\xe0": + # read the second half + # we always return the 0x00 prefix, this avoids duplications in the key module + ch = "\x00" + readchar() + + # parse unicode surrogates + # https://docs.python.org/3/c-api/unicode.html#c.Py_UNICODE_IS_SURROGATE + if "\uD800" <= ch <= "\uDFFF": + ch += readchar() - # if it is a scpeal key, read second half: - ch2 = readchar() + # combine the characters into a single utf-16 encoded string. + # this prevents the character from being treated as a surrogate pair again. + ch = ch.encode("utf-16", errors="surrogatepass").decode("utf-16") - return "\x00" + ch2 + return ch From 050dd5bc0b7b3bb20cb67966e89cf86648619c8a Mon Sep 17 00:00:00 2001 From: Jan Wille Date: Sun, 11 Aug 2024 21:35:38 +0200 Subject: [PATCH 10/10] add tests for new unicode support --- tests/windows/test_readchar.py | 15 +++++++++++++++ tests/windows/test_readkey.py | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tests/windows/test_readchar.py b/tests/windows/test_readchar.py index a6fa882..b8a8303 100644 --- a/tests/windows/test_readchar.py +++ b/tests/windows/test_readchar.py @@ -62,3 +62,18 @@ def test_controlCharacters(seq, key, patched_stdin): def test_CTRL_Characters(seq, key, patched_stdin): patched_stdin.push(seq) assert key == readchar() + + +@pytest.mark.parametrize( + ["seq", "key"], + [ + ("\xe4", "ä"), + ("\xe1", "á"), + ("\xe5", "å"), + ("\xdf", "ß"), + ("\u304c", "が"), + ], +) +def test_Unicode_Characters(seq, key, patched_stdin): + patched_stdin.push(seq) + assert key == readchar() diff --git a/tests/windows/test_readkey.py b/tests/windows/test_readkey.py index 8cfe53f..f2baafd 100644 --- a/tests/windows/test_readkey.py +++ b/tests/windows/test_readkey.py @@ -65,3 +65,22 @@ def test_navigationKeys(seq, key, patched_stdin): def test_functionKeys(seq, key, patched_stdin): patched_stdin.push(seq) assert key == readkey() + + +@pytest.mark.parametrize( + ["seq", "key"], + [ + ("\ud83d\ude00", "😀"), + ("\ud83d\ude18", "😘"), + ("\ud83d\ude09", "😉"), + ("\ud83d\udc4d", "👍"), + ("\ud83d\udc35", "🐵"), + ("\ud83c\udf47", "🍇"), + ("\ud83c\udf83", "🎃"), + ("\ud83d\udc53", "👓"), + ("\ud83c\udfc1", "🏁"), + ], +) +def test_UnicodeSurrogates(seq, key, patched_stdin): + patched_stdin.push(seq) + assert key == readkey()