From 6d74ba50841cdec18869b52442e1119333505cab Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 18 Jun 2019 17:51:35 +0200 Subject: [PATCH 1/3] bpo-37330: open() no longer accept 'U' in file mode open(), io.open(), codecs.open() and fileinput.FileInput no longer accept "U" ("universal newline") in the file mode. This flag was deprecated since Python 3.3. test_fileinput: add tests for 'rU' and 'U' modes Run make regen-all. --- Doc/library/codecs.rst | 3 +++ Doc/library/fileinput.rst | 9 +++---- Doc/library/functions.rst | 14 +++-------- Doc/whatsnew/3.9.rst | 8 ++++++ Lib/_pyio.py | 14 +---------- Lib/fileinput.py | 11 +++----- Lib/imp.py | 2 +- Lib/test/test_codecs.py | 5 ++-- Lib/test/test_fileinput.py | 22 ++++------------ Lib/test/test_io.py | 16 +++++------- .../2019-06-18-17-53-06.bpo-37330.wAvHmz.rst | 3 +++ Modules/_io/_iomodule.c | 25 ++----------------- Modules/_io/clinic/_iomodule.c.h | 7 +----- 13 files changed, 43 insertions(+), 96 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-06-18-17-53-06.bpo-37330.wAvHmz.rst diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index f071057293eece..ec6a0533033bd1 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -197,6 +197,9 @@ wider range of codecs when working with binary files: *buffering* has the same meaning as for the built-in :func:`open` function. It defaults to -1 which means that the default buffer size will be used. + .. versionchanged:: 3.9 + The ``'U'`` mode has been removed. + .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict') diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst index f5e5280a136399..8870c177fb1e1c 100644 --- a/Doc/library/fileinput.rst +++ b/Doc/library/fileinput.rst @@ -148,8 +148,8 @@ available for subclassing as well: The sequence must be accessed in strictly sequential order; random access and :meth:`~io.TextIOBase.readline` cannot be mixed. - With *mode* you can specify which file mode will be passed to :func:`open`. It - must be one of ``'r'``, ``'rU'``, ``'U'`` and ``'rb'``. + With *mode* you can specify which file mode will be passed to :func:`open`. + It must be ``'r'`` or ``'rb'``. The *openhook*, when given, must be a function that takes two arguments, *filename* and *mode*, and returns an accordingly opened file-like object. You @@ -166,15 +166,14 @@ available for subclassing as well: .. versionchanged:: 3.2 Can be used as a context manager. - .. deprecated:: 3.4 - The ``'rU'`` and ``'U'`` modes. - .. deprecated:: 3.8 Support for :meth:`__getitem__` method is deprecated. .. versionchanged:: 3.8 The keyword parameter *mode* and *openhook* are now keyword-only. + .. versionchanged:: 3.9 + The ``'rU'`` and ``'U'`` modes have been removed. **Optional in-place filtering:** if the keyword argument ``inplace=True`` is diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 28d9c7b99e6f52..acf922dcd13786 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1085,12 +1085,6 @@ are always available. They are listed here in alphabetical order. first decoded using a platform-dependent encoding or using the specified *encoding* if given. - There is an additional mode character permitted, ``'U'``, which no longer - has any effect, and is considered deprecated. It previously enabled - :term:`universal newlines` in text mode, which became the default behaviour - in Python 3.0. Refer to the documentation of the - :ref:`newline ` parameter for further details. - .. note:: Python doesn't depend on the underlying operating system's notion of text @@ -1247,10 +1241,6 @@ are always available. They are listed here in alphabetical order. * The file is now non-inheritable. - .. deprecated-removed:: 3.4 4.0 - - The ``'U'`` mode. - .. versionchanged:: 3.5 @@ -1266,6 +1256,10 @@ are always available. They are listed here in alphabetical order. * On Windows, opening a console buffer may return a subclass of :class:`io.RawIOBase` other than :class:`io.FileIO`. + .. versionchanged:: 3.9 + The ``'U'`` mode has been removed. + + .. function:: ord(c) Given a string representing one Unicode character, return an integer diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index b95b0999cb9524..4d917307fe90dc 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -277,6 +277,14 @@ that may require changes to your code. Changes in the Python API ------------------------- +* :func:`open`, :func:`io.open`, :func:`codecs.open` and + :class:`fileinput.FileInput` no longer accept ``'U'`` ("universal newline") + in the file mode. This flag was deprecated since Python 3.3. In Python 3, the + "universal newline" is used by default when a file is open in text mode. The + :ref:`newline parameter ` of :func:`open` controls + how universal newlines works. + (Contributed by Victor Stinner in :issue:`37330`.) + * :func:`__import__` and :func:`importlib.util.resolve_name` now raise :exc:`ImportError` where it previously raised :exc:`ValueError`. Callers catching the specific exception type and supporting both Python 3.9 and diff --git a/Lib/_pyio.py b/Lib/_pyio.py index c1bdac7913193e..e819b0a3cce856 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -71,7 +71,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, 'b' binary mode 't' text mode (default) '+' open a disk file for updating (reading and writing) - 'U' universal newline mode (deprecated) ========= =============================================================== The default mode is 'rt' (open for reading text). For binary random @@ -87,10 +86,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified encoding if given. - 'U' mode is deprecated and will raise an exception in future versions - of Python. It has no effect in Python 3. Use newline to control - universal newlines mode. - buffering is an optional integer used to set the buffering policy. Pass 0 to switch buffering off (only allowed in binary mode), 1 to select line buffering (only usable in text mode), and an integer > 1 to indicate @@ -176,7 +171,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, if errors is not None and not isinstance(errors, str): raise TypeError("invalid errors: %r" % errors) modes = set(mode) - if modes - set("axrwb+tU") or len(mode) > len(modes): + if modes - set("axrwb+t") or len(mode) > len(modes): raise ValueError("invalid mode: %r" % mode) creating = "x" in modes reading = "r" in modes @@ -185,13 +180,6 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, updating = "+" in modes text = "t" in modes binary = "b" in modes - if "U" in modes: - if creating or writing or appending or updating: - raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'") - import warnings - warnings.warn("'U' mode is deprecated", - DeprecationWarning, 2) - reading = True if text and binary: raise ValueError("can't have text and binary mode at once") if creating + reading + writing + appending > 1: diff --git a/Lib/fileinput.py b/Lib/fileinput.py index c1b0ec9a8ed084..166c631689747d 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -209,15 +209,10 @@ def __init__(self, files=None, inplace=False, backup="", *, self._isstdin = False self._backupfilename = None # restrict mode argument to reading modes - if mode not in ('r', 'rU', 'U', 'rb'): - raise ValueError("FileInput opening mode must be one of " - "'r', 'rU', 'U' and 'rb'") - if 'U' in mode: - import warnings - warnings.warn("'U' mode is deprecated", - DeprecationWarning, 2) + if mode not in ('r', 'rb'): + raise ValueError("FileInput opening mode must be 'r' or 'rb'") self._mode = mode - self._write_mode = mode.replace('r', 'w') if 'U' not in mode else 'w' + self._write_mode = mode.replace('r', 'w') if openhook: if inplace: raise ValueError("FileInput cannot use an opening hook in inplace mode") diff --git a/Lib/imp.py b/Lib/imp.py index 31f8c766381adc..a6f6fc84349026 100644 --- a/Lib/imp.py +++ b/Lib/imp.py @@ -225,7 +225,7 @@ def load_module(name, file, filename, details): """ suffix, mode, type_ = details - if mode and (not mode.startswith(('r', 'U')) or '+' in mode): + if mode and (not mode.startswith('r') or '+' in mode): raise ValueError('invalid file open mode {!r}'.format(mode)) elif file is None and type_ in {PY_SOURCE, PY_COMPILED}: msg = 'file object required for import (type code {})'.format(type_) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index b37525bf660430..d98c24dc74eb14 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -712,9 +712,8 @@ def test_bug691291(self): self.addCleanup(support.unlink, support.TESTFN) with open(support.TESTFN, 'wb') as fp: fp.write(s) - with support.check_warnings(('', DeprecationWarning)): - reader = codecs.open(support.TESTFN, 'U', encoding=self.encoding) - with reader: + with codecs.open(support.TESTFN, 'r', + encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) class UTF16LETest(ReadTest, unittest.TestCase): diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 014f19e6cbdb1a..819557d5e86860 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -226,19 +226,11 @@ def test_fileno(self): self.assertEqual(fi.fileno(), -1) def test_opening_mode(self): - try: - # invalid mode, should raise ValueError - fi = FileInput(mode="w") - self.fail("FileInput should reject invalid mode argument") - except ValueError: - pass - # try opening in universal newline mode - t1 = self.writeTmp(b"A\nB\r\nC\rD", mode="wb") - with check_warnings(('', DeprecationWarning)): - fi = FileInput(files=t1, mode="U") - with check_warnings(('', DeprecationWarning)): - lines = list(fi) - self.assertEqual(lines, ["A\n", "B\n", "C\n", "D"]) + # invalid modes + for mode in ('w', 'rU', 'U'): + with self.subTest(mode=mode): + with self.assertRaises(ValueError): + FileInput(mode=mode) def test_stdin_binary_mode(self): with mock.patch('sys.stdin') as m_stdin: @@ -985,10 +977,6 @@ def check(mode, expected_lines): self.assertEqual(lines, expected_lines) check('r', ['A\n', 'B\n', 'C\n', 'D\u20ac']) - with self.assertWarns(DeprecationWarning): - check('rU', ['A\n', 'B\n', 'C\n', 'D\u20ac']) - with self.assertWarns(DeprecationWarning): - check('U', ['A\n', 'B\n', 'C\n', 'D\u20ac']) with self.assertRaises(ValueError): check('rb', ['A\n', 'B\r\n', 'C\r', 'D\u20ac']) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 1fe1cba5167fc6..b967a47aea30b3 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3886,16 +3886,6 @@ def test_attributes(self): self.assertEqual(f.mode, "wb") f.close() - with support.check_warnings(('', DeprecationWarning)): - f = self.open(support.TESTFN, "U") - self.assertEqual(f.name, support.TESTFN) - self.assertEqual(f.buffer.name, support.TESTFN) - self.assertEqual(f.buffer.raw.name, support.TESTFN) - self.assertEqual(f.mode, "U") - self.assertEqual(f.buffer.mode, "rb") - self.assertEqual(f.buffer.raw.mode, "rb") - f.close() - f = self.open(support.TESTFN, "w+") self.assertEqual(f.mode, "w+") self.assertEqual(f.buffer.mode, "rb+") # Does it really matter? @@ -3909,6 +3899,12 @@ def test_attributes(self): f.close() g.close() + def test_removed_u_mode(self): + with self.assertRaises(ValueError) as cm: + # "U" mode has been removed in Python 3.9 + self.open(support.TESTFN, "U") + self.assertIn('invalid mode', str(cm.exception)) + def test_io_after_close(self): for kwargs in [ {"mode": "w"}, diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-06-18-17-53-06.bpo-37330.wAvHmz.rst b/Misc/NEWS.d/next/Core and Builtins/2019-06-18-17-53-06.bpo-37330.wAvHmz.rst new file mode 100644 index 00000000000000..aec6d018600a58 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-06-18-17-53-06.bpo-37330.wAvHmz.rst @@ -0,0 +1,3 @@ +:func:`open`, :func:`io.open`, :func:`codecs.open` and +:class:`fileinput.FileInput` no longer accept ``'U'`` ("universal newline") in +the file mode. This flag was deprecated since Python 3.3. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index b4cc894004027b..5932363f3af359 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -138,7 +138,6 @@ Character Meaning 'b' binary mode 't' text mode (default) '+' open a disk file for updating (reading and writing) -'U' universal newline mode (deprecated) ========= =============================================================== The default mode is 'rt' (open for reading text). For binary random @@ -154,10 +153,6 @@ bytes objects without any decoding. In text mode (the default, or when returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified encoding if given. -'U' mode is deprecated and will raise an exception in future versions -of Python. It has no effect in Python 3. Use newline to control -universal newlines mode. - buffering is an optional integer used to set the buffering policy. Pass 0 to switch buffering off (only allowed in binary mode), 1 to select line buffering (only usable in text mode), and an integer > 1 to indicate @@ -233,12 +228,12 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=7295902222e6b311]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=1543f4511d2356a5]*/ { unsigned i; int creating = 0, reading = 0, writing = 0, appending = 0, updating = 0; - int text = 0, binary = 0, universal = 0; + int text = 0, binary = 0; char rawmode[6], *m; int line_buffering, is_number; @@ -296,10 +291,6 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, case 'b': binary = 1; break; - case 'U': - universal = 1; - reading = 1; - break; default: goto invalid_mode; } @@ -322,18 +313,6 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, *m = '\0'; /* Parameters validation */ - if (universal) { - if (creating || writing || appending || updating) { - PyErr_SetString(PyExc_ValueError, - "mode U cannot be combined with 'x', 'w', 'a', or '+'"); - goto error; - } - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "'U' mode is deprecated", 1) < 0) - goto error; - reading = 1; - } - if (text && binary) { PyErr_SetString(PyExc_ValueError, "can't have text and binary mode at once"); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 1a9651d340813f..c1f518ff60273a 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -36,7 +36,6 @@ PyDoc_STRVAR(_io_open__doc__, "\'b\' binary mode\n" "\'t\' text mode (default)\n" "\'+\' open a disk file for updating (reading and writing)\n" -"\'U\' universal newline mode (deprecated)\n" "========= ===============================================================\n" "\n" "The default mode is \'rt\' (open for reading text). For binary random\n" @@ -52,10 +51,6 @@ PyDoc_STRVAR(_io_open__doc__, "returned as strings, the bytes having been first decoded using a\n" "platform-dependent encoding or using the specified encoding if given.\n" "\n" -"\'U\' mode is deprecated and will raise an exception in future versions\n" -"of Python. It has no effect in Python 3. Use newline to control\n" -"universal newlines mode.\n" -"\n" "buffering is an optional integer used to set the buffering policy.\n" "Pass 0 to switch buffering off (only allowed in binary mode), 1 to select\n" "line buffering (only usable in text mode), and an integer > 1 to indicate\n" @@ -323,4 +318,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=3df6bc6d91697545 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=680e4b488c7da8a1 input=a9049054013a1b77]*/ From 74e70768cc22869ed06ac80ae1f164f6a544c48c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 28 Oct 2019 11:49:07 +0100 Subject: [PATCH 2/3] Add more tests --- Lib/test/test_codecs.py | 8 ++++++++ Lib/test/test_io.py | 9 +++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d98c24dc74eb14..036e7e85197bf9 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -716,6 +716,14 @@ def test_bug691291(self): encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) + def test_removed_u_mode(self): + # "U" mode has been removed in Python 3.9 + for mode in ("U", "rU", "r+U"): + with self.assertRaises(ValueError) as cm: + codecs.open(support.TESTFN, mode) + self.assertIn('invalid mode', str(cm.exception)) + + class UTF16LETest(ReadTest, unittest.TestCase): encoding = "utf-16-le" ill_formed_sequence = b"\x80\xdc" diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index b967a47aea30b3..ad22dfe9b29107 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3900,10 +3900,11 @@ def test_attributes(self): g.close() def test_removed_u_mode(self): - with self.assertRaises(ValueError) as cm: - # "U" mode has been removed in Python 3.9 - self.open(support.TESTFN, "U") - self.assertIn('invalid mode', str(cm.exception)) + # "U" mode has been removed in Python 3.9 + for mode in ("U", "rU", "r+U"): + with self.assertRaises(ValueError) as cm: + self.open(support.TESTFN, mode) + self.assertIn('invalid mode', str(cm.exception)) def test_io_after_close(self): for kwargs in [ From 703dc6a62834b20c9331317a5abee78058d39ddf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 28 Oct 2019 12:48:07 +0100 Subject: [PATCH 3/3] test_codecs: pass an encoding --- Lib/test/test_codecs.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 036e7e85197bf9..e1638c111681c8 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -716,13 +716,18 @@ def test_bug691291(self): encoding=self.encoding) as reader: self.assertEqual(reader.read(), s1) - def test_removed_u_mode(self): - # "U" mode has been removed in Python 3.9 - for mode in ("U", "rU", "r+U"): + def test_invalid_modes(self): + for mode in ('U', 'rU', 'r+U'): with self.assertRaises(ValueError) as cm: - codecs.open(support.TESTFN, mode) + codecs.open(support.TESTFN, mode, encoding=self.encoding) self.assertIn('invalid mode', str(cm.exception)) + for mode in ('rt', 'wt', 'at', 'r+t'): + with self.assertRaises(ValueError) as cm: + codecs.open(support.TESTFN, mode, encoding=self.encoding) + self.assertIn("can't have text and binary mode at once", + str(cm.exception)) + class UTF16LETest(ReadTest, unittest.TestCase): encoding = "utf-16-le"