diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 5b653f66c18554..ce4776cfc3e20a 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1513,16 +1513,22 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) - if (obs_local_part[0].token_type == 'dot' or - obs_local_part[0].token_type=='cfws' and - obs_local_part[1].token_type=='dot'): + if not obs_local_part: obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid leading '.' in local part")) - if (obs_local_part[-1].token_type == 'dot' or - obs_local_part[-1].token_type=='cfws' and - obs_local_part[-2].token_type=='dot'): - obs_local_part.defects.append(errors.InvalidHeaderDefect( - "Invalid trailing '.' in local part")) + "abandoned parse; truncated value?")) + else: + if (obs_local_part[0].token_type == 'dot' + or (obs_local_part[0].token_type=='cfws' + and len(obs_local_part) > 1 + and obs_local_part[1].token_type=='dot')): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid leading '.' in local part")) + if (obs_local_part[-1].token_type == 'dot' + or (obs_local_part[-1].token_type=='cfws' + and len(obs_local_part) > 1 + and obs_local_part[-2].token_type=='dot')): + obs_local_part.defects.append(errors.InvalidHeaderDefect( + "Invalid trailing '.' in local part")) if obs_local_part.defects: obs_local_part.token_type = 'invalid-obs-local-part' return obs_local_part, value diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index bdb0e55f21069f..7327f91d1224af 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2588,6 +2588,22 @@ def test_get_msg_id_empty(self): with self.assertRaises(errors.HeaderParseError): parser.get_msg_id('') + def test_get_msg_id_square_brackets(self): + # gh-105802: test for broken Microsoft Message-Id with square brackets. + msg_id = self._test_get_x( + parser.get_msg_id, + '<[TeRriBlyLongBase64==@microsoft.com]>', + '<', # sic + '<', # sic + # This also triggers + # ObsoleteHeaderDefect('obsolete id-left in msg-id') + # and InvalidHeaderDefect('msg-id with no id-right') + [errors.ObsoleteHeaderDefect, errors.InvalidHeaderDefect, + errors.InvalidHeaderDefect], + '[TeRriBlyLongBase64==@microsoft.com]>', + ) + self.assertEqual(msg_id.token_type,'msg-id') + def test_get_msg_id_valid(self): msg_id = self._test_get_x( parser.get_msg_id, diff --git a/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst new file mode 100644 index 00000000000000..a3da1ecebd8b5a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-19-13-48-11.gh-issue-105802.0p286F.rst @@ -0,0 +1,2 @@ +In :mod:`email` library, avoid another IndexError in Message-Id parsing for +improper tokens; fixes <[broken@microsoft.com]> case