From 622b932af468340bfe3fe3befba7d5fe593e98d7 Mon Sep 17 00:00:00 2001 From: Julien Castiaux Date: Mon, 22 Nov 2021 18:34:22 +0100 Subject: [PATCH 1/2] bpo-44637: Fix DBQuote mail header refold When a header content is too long, the RFC demands to fold it over multiple lines. Each line starting with a space to denote folded-lines from regular ones. Folding a line requires splitint it, there are only a few sweet spots where it is possible to do so (e.g. between two words). Words are pretty deep in the parse-tree thus multiple parts must be unwrap to reveal them. One of those parts can be a quoted-string, printing a quoted-string as a whole correctly wraps its content with double-quotes but printing every child never quotes them. When a quoted-string must be unwrap to find a sweet-splot to split the line, we now inject double-quotes literals before and after its children. --- Lib/email/_header_value_parser.py | 9 ++++++++- Lib/test/test_email/test_email.py | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 51d355fbb0abc5..8bf9edbfd691fa 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2841,7 +2841,14 @@ def _refold_parse_tree(parse_tree, *, policy): continue if not hasattr(part, 'encode'): # It's not a terminal, try folding the subparts. - newparts = list(part) + if part.token_type == 'bare-quoted-string': + newparts = [ + ValueTerminal('"', 'DQUOTE'), + *list(part), + ValueTerminal('"', 'DQUOTE'), + ] + else: + newparts = list(part) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index a3ccbbbabfb328..945e01cff3b88e 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -1431,6 +1431,13 @@ def test_long_rfc2047_header_with_embedded_fws(self): =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?= =?utf-8?q?_folding_white_space_works?=""")+'\n') + def test_long_quoted_string_header(self): + msg = Message(policy=email.policy.default) + msg['To'] = '"John Doe Example Inc. Houtesiplou Belgium" ' + self.assertEqual( + msg.as_string(maxheaderlen=40), + 'To: "John Doe Example Inc. Houtesiplou\n Belgium" \n\n', + ) # Test mangling of "From " lines in the body of a message From 38ca1c2875839fb792f89460571e1c1db2cad9fa Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 19 Apr 2022 10:38:02 +0000 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Security/2022-04-19-10-38-00.gh-issue-88803.TMtK9R.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Security/2022-04-19-10-38-00.gh-issue-88803.TMtK9R.rst diff --git a/Misc/NEWS.d/next/Security/2022-04-19-10-38-00.gh-issue-88803.TMtK9R.rst b/Misc/NEWS.d/next/Security/2022-04-19-10-38-00.gh-issue-88803.TMtK9R.rst new file mode 100644 index 00000000000000..727bc466002d99 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-04-19-10-38-00.gh-issue-88803.TMtK9R.rst @@ -0,0 +1 @@ +When a mail header content is too long, the RFC demands to fold it over multiple lines, finding a sweet spot (e.g. between two words) where to split the line. When the sweet-spot was inside of a quoted-string, the quotation marks were wrongly removed which led to invalid header values.