From e6d5880828321601666a4aae551a5942998728b3 Mon Sep 17 00:00:00 2001 From: fsc-eriker <72394365+fsc-eriker@users.noreply.github.com> Date: Sat, 19 Aug 2023 14:14:35 +0300 Subject: [PATCH 1/5] email._header_value_parser: prevent IndexError in get_obs_local_part (GH-105802) --- Lib/email/_header_value_parser.py | 3 +++ Lib/test/test_email/test__header_value_parser.py | 5 +++++ .../Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst | 2 ++ 3 files changed, 10 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 0d6bd812475eea..d999d26533fb91 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1513,6 +1513,9 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) + if not obs_local_part: + raise(errors.HeaderParseError( + "abandoning parse; truncated value? ({})".format(value))) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and obs_local_part[1].token_type=='dot'): diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 854f2ff009c618..3bb3067262c892 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2588,6 +2588,11 @@ def test_get_msg_id_empty(self): with self.assertRaises(errors.HeaderParseError): parser.get_msg_id('') + def test_get_msg_id_botched(self): + # gh-105802: ditto for broken Microsoft Message-Id + with self.assertRaises(errors.HeaderParseError): + parser.get_msg_id('<[83c48dddbea7492e873224a5ae1c04be-JFBVALKQOJXWILKNK4YVA7CBPJ2XEZKEMV3E64DTPRCW2YLJNR6EK6DPKNWXI4A=@microsoft.com]>') + def test_get_msg_id_valid(self): msg_id = self._test_get_x( parser.get_msg_id, diff --git a/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst new file mode 100644 index 00000000000000..cf5ac8b971890f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst @@ -0,0 +1,2 @@ +In `:mod:`email` library, avoid another IndexError in Message-Id parsing for +improper tokens; fixes <[broken@microsoft.com]> case From bf6d7b5e906219d67c4a43981cb7ff9992d0c0ec Mon Sep 17 00:00:00 2001 From: fsc-eriker <72394365+fsc-eriker@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:55:29 +0200 Subject: [PATCH 2/5] Fix formatting error in blurb --- .../next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst index cf5ac8b971890f..a3da1ecebd8b5a 100644 --- a/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst +++ b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst @@ -1,2 +1,2 @@ -In `:mod:`email` library, avoid another IndexError in Message-Id parsing for +In :mod:`email` library, avoid another IndexError in Message-Id parsing for improper tokens; fixes <[broken@microsoft.com]> case From a614ff14332069872c937b6d5ee268e8cb980e51 Mon Sep 17 00:00:00 2001 From: fsc-eriker <72394365+fsc-eriker@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:45:58 +0200 Subject: [PATCH 3/5] Update for GH-104802: register a defect instead of raising an error Based on PR review --- Lib/email/_header_value_parser.py | 17 +++++++++-------- .../test_email/test__header_value_parser.py | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index c24cd3a97666bb..62300870069350 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1514,18 +1514,19 @@ def get_obs_local_part(value): token, value = get_cfws(value) obs_local_part.append(token) if not obs_local_part: - raise(errors.HeaderParseError( - "abandoning parse; truncated value? ({})".format(value))) - if (obs_local_part[0].token_type == 'dot' or + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "abandoned parse; truncated value?")) + else: + if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and obs_local_part[1].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid leading '.' in local part")) - if (obs_local_part[-1].token_type == 'dot' or + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid leading '.' in local part")) + if (obs_local_part[-1].token_type == 'dot' or obs_local_part[-1].token_type=='cfws' and obs_local_part[-2].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid trailing '.' in local part")) + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid trailing '.' in local part")) if obs_local_part.defects: obs_local_part.token_type = 'invalid-obs-local-part' return obs_local_part, value diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index f7b858c9c2fc71..7327f91d1224af 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2588,10 +2588,21 @@ def test_get_msg_id_empty(self): with self.assertRaises(errors.HeaderParseError): parser.get_msg_id('') - def test_get_msg_id_botched(self): - # gh-105802: ditto for broken Microsoft Message-Id - with self.assertRaises(errors.HeaderParseError): - parser.get_msg_id('<[83c48dddbea7492e873224a5ae1c04be-JFBVALKQOJXWILKNK4YVA7CBPJ2XEZKEMV3E64DTPRCW2YLJNR6EK6DPKNWXI4A=@microsoft.com]>') + def test_get_msg_id_square_brackets(self): + # gh-105802: test for broken Microsoft Message-Id with square brackets. + msg_id = self._test_get_x( + parser.get_msg_id, + '<[TeRriBlyLongBase64==@microsoft.com]>', + '<', # sic + '<', # sic + # This also triggers + # ObsoleteHeaderDefect('obsolete id-left in msg-id') + # and InvalidHeaderDefect('msg-id with no id-right') + [errors.ObsoleteHeaderDefect, errors.InvalidHeaderDefect, + errors.InvalidHeaderDefect], + '[TeRriBlyLongBase64==@microsoft.com]>', + ) + self.assertEqual(msg_id.token_type,'msg-id') def test_get_msg_id_valid(self): msg_id = self._test_get_x( From 73b050ec6cbeeef0429d64a02485464f22d39a6a Mon Sep 17 00:00:00 2001 From: fsc-eriker <72394365+fsc-eriker@users.noreply.github.com> Date: Fri, 2 Feb 2024 08:27:37 +0200 Subject: [PATCH 4/5] Tighten up obs_local_part checks further to avoid IndexError Also, reformat slightly to make the indentation more obvious --- Lib/email/_header_value_parser.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 62300870069350..c448fd07dfcca1 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1517,14 +1517,16 @@ def get_obs_local_part(value): obs_local_part.defects.append(errors.InvalidHeaderDefect( "abandoned parse; truncated value?")) else: - if (obs_local_part[0].token_type == 'dot' or - obs_local_part[0].token_type=='cfws' and - obs_local_part[1].token_type=='dot'): + if (obs_local_part[0].token_type == 'dot' + or (obs_local_part[0].token_type=='cfws' + and len(obs_local_part) == 1 + or obs_local_part[1].token_type=='dot')): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) - if (obs_local_part[-1].token_type == 'dot' or - obs_local_part[-1].token_type=='cfws' and - obs_local_part[-2].token_type=='dot'): + if (obs_local_part[-1].token_type == 'dot' + or (obs_local_part[-1].token_type=='cfws' + and len(obs_local_part) == 1 + or obs_local_part[-2].token_type=='dot')): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) if obs_local_part.defects: From 03efb0dbc915318da90fa88011bdb8f12cd3688c Mon Sep 17 00:00:00 2001 From: fsc-eriker <72394365+fsc-eriker@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:27:51 +0200 Subject: [PATCH 5/5] Update for GH-104802: don't register dot defect if there is a single cwf Don't add new error cases when refactoring; simply ensure that the code avoids an IndexError for the cases it attempts to handle --- Lib/email/_header_value_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index c448fd07dfcca1..ce4776cfc3e20a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1519,14 +1519,14 @@ def get_obs_local_part(value): else: if (obs_local_part[0].token_type == 'dot' or (obs_local_part[0].token_type=='cfws' - and len(obs_local_part) == 1 - or obs_local_part[1].token_type=='dot')): + and len(obs_local_part) > 1 + and obs_local_part[1].token_type=='dot')): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) if (obs_local_part[-1].token_type == 'dot' or (obs_local_part[-1].token_type=='cfws' - and len(obs_local_part) == 1 - or obs_local_part[-2].token_type=='dot')): + and len(obs_local_part) > 1 + and obs_local_part[-2].token_type=='dot')): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) if obs_local_part.defects: