@@ -975,6 +975,7 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
975975{
976976 BOOL res = TRUE;
977977 wchar_t * wbuf ;
978+ char * buf ;
978979 DWORD len , wlen , orig_len , n = 0 ;
979980 HANDLE handle ;
980981
@@ -1008,11 +1009,23 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls,
10081009 while (wlen > 32766 / sizeof (wchar_t )) {
10091010 len /= 2 ;
10101011 orig_len = len ;
1011- /* Reduce the length until we hit the final byte of a UTF-8 sequence
1012- * (top bit is unset). Fix for github issue 82052.
1013- */
1014- while (len > 0 && (((char * )b -> buf )[len - 1 ] & 0x80 ) != 0 )
1015- -- len ;
1012+ /* Reduce the length until we find an UTF-8 sequence boundary.
1013+ Fix for github issue gh-110913 and gh-82052.
1014+ If the last byte was not a 1-byte character, enter the workaround
1015+ */
1016+ buf = (char * )b -> buf ;
1017+ if (len > 0 && (buf [len - 1 ] & 0x80 ) != 0 ) {
1018+ while (len > 0 && (buf [len - 1 ] & 0xc0 ) == 0x80 ) {
1019+ /* Trace back all the UTF-8 continuation bytes */
1020+ -- len ;
1021+ }
1022+ if (len > 0 ) {
1023+ /* Consume one more byte. If the encoding is correct, this
1024+ byte is the head of the last (potentially incomplete) UTF-8
1025+ sequence, which too needs to be in the next chunk. */
1026+ -- len ;
1027+ }
1028+ }
10161029 /* If we hit a length of 0, something has gone wrong. This shouldn't
10171030 * be possible, as valid UTF-8 can have at most 3 non-final bytes
10181031 * before a final one, and our buffer is way longer than that.
0 commit comments