From 496bbec0cc47a3eb6a5bcb7d43d6f22e5344906e Mon Sep 17 00:00:00 2001 From: "Joe S. Boyle" Date: Wed, 8 Mar 2023 20:11:45 +0000 Subject: [PATCH 1/5] Refactor the mime audio module --- Lib/email/mime/audio.py | 62 ++++++----------------------------------- 1 file changed, 8 insertions(+), 54 deletions(-) diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py index 065819b2a2101d..a6d23ad7b74d0b 100644 --- a/Lib/email/mime/audio.py +++ b/Lib/email/mime/audio.py @@ -6,7 +6,6 @@ __all__ = ['MIMEAudio'] -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart @@ -36,65 +35,20 @@ def __init__(self, _audiodata, _subtype=None, constructor, which turns them into parameters on the Content-Type header. """ - if _subtype is None: - _subtype = _what(_audiodata) - if _subtype is None: - raise TypeError('Could not find audio MIME subtype') + _subtype = _subtype or _infer_subtype(_audiodata) MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, **_params) self.set_payload(_audiodata) _encoder(self) -_rules = [] - - -# Originally from the sndhdr module. -# -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _what(data): - # Try to identify a sound file type. - # - # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why - # we re-do it here. It would be easier to reverse engineer the Unix 'file' - # command and use the standard 'magic' file, as shipped with a modern Unix. - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in _rules: - if res := testfn(hdr, fakefile): - return res - else: - return None - - -def rule(rulefunc): - _rules.append(rulefunc) - return rulefunc - - -@rule -def _aiff(h, f): - if not h.startswith(b'FORM'): - return None - if h[8:12] in {b'AIFC', b'AIFF'}: +def _infer_subtype(h: bytes) -> str: + if h.startswith(b'FORM') and h[8:12] in (b'AIFC', b'AIFF'): return 'x-aiff' - else: - return None - - -@rule -def _au(h, f): - if h.startswith(b'.snd'): + elif h.startswith(b'.snd'): return 'basic' - else: - return None - + elif h.startswith(b'RIFF') and h[8:12] == b'WAVE' and h[12:16] == b'fmt ': + # 'RIFF' 'WAVE' 'fmt ' + return 'x-wav' -@rule -def _wav(h, f): - # 'RIFF' 'WAVE' 'fmt ' - if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': - return None - else: - return "x-wav" + raise TypeError('Could not find audio MIME subtype') From 3561ee9f6f563ea75553f90cc6d687d852b6b051 Mon Sep 17 00:00:00 2001 From: "Joe S. Boyle" Date: Wed, 8 Mar 2023 21:49:23 +0000 Subject: [PATCH 2/5] Improve comparisons in _infer_subtype --- Lib/email/mime/audio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py index a6d23ad7b74d0b..bf3265448fcb6a 100644 --- a/Lib/email/mime/audio.py +++ b/Lib/email/mime/audio.py @@ -43,12 +43,12 @@ def __init__(self, _audiodata, _subtype=None, def _infer_subtype(h: bytes) -> str: + """Infer the audio format based on the data's header""" if h.startswith(b'FORM') and h[8:12] in (b'AIFC', b'AIFF'): return 'x-aiff' elif h.startswith(b'.snd'): return 'basic' - elif h.startswith(b'RIFF') and h[8:12] == b'WAVE' and h[12:16] == b'fmt ': - # 'RIFF' 'WAVE' 'fmt ' + elif h.startswith(b'RIFF') and h[8:16] == b'WAVEfmt ': return 'x-wav' raise TypeError('Could not find audio MIME subtype') From 15cd8ad98a1808d3ce09ac9db793ba14a0218888 Mon Sep 17 00:00:00 2001 From: "Joe S. Boyle" Date: Thu, 9 Mar 2023 19:48:01 +0000 Subject: [PATCH 3/5] Add a test for inferring the audio content subtypes --- Lib/test/test_email/test_email.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 44b405740c4403..22cce9b7ab8ac8 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -1563,6 +1563,18 @@ def test_add_header(self): self.assertIs(self._au.get_param('attachment', missing, header='foobar'), missing) + def test_infer_audio_content_subtypes(self): + data_to_subtype = { + b'FORM AIFC' : 'x-aiff', + b'FORM AIFF' : 'x-aiff', + b'.snd' : 'basic', + b'.snd1orem1ps' : 'basic', # the chars after '.snd' don't matter. + b'RIFF WAVEfmt ': 'x-wav', # trailing space is intentional. + } + + for audiodata, subtype in data_to_subtype.items(): + au = MIMEAudio(audiodata) + self.assertEqual(au.get_content_subtype(), subtype) # Test the basic MIMEImage class From 5b26332a74ded30aab6e1f8b60451b0402c0f377 Mon Sep 17 00:00:00 2001 From: "Joe S. Boyle" Date: Thu, 9 Mar 2023 19:56:09 +0000 Subject: [PATCH 4/5] Make the infer audio content subtypes test more robust --- Lib/test/test_email/test_email.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 22cce9b7ab8ac8..2df1453328b4d8 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -1567,11 +1567,16 @@ def test_infer_audio_content_subtypes(self): data_to_subtype = { b'FORM AIFC' : 'x-aiff', b'FORM AIFF' : 'x-aiff', + b'RIFF WAVEfmt ': 'x-wav', b'.snd' : 'basic', - b'.snd1orem1ps' : 'basic', # the chars after '.snd' don't matter. - b'RIFF WAVEfmt ': 'x-wav', # trailing space is intentional. - } + # Check that the 4 intermediate chars have no effect + b'FORMabcdAIFC' : 'x-aiff', + b'FORMzyxwAIFF' : 'x-aiff', + b'RIFF1234WAVEfmt ': 'x-wav', + # Check that anything after the first 4 chars has no effect + b'.sndFOOBARZ' : 'basic', + } for audiodata, subtype in data_to_subtype.items(): au = MIMEAudio(audiodata) self.assertEqual(au.get_content_subtype(), subtype) From 1b4b0617745831e2aee1449c36dbd16117b5b288 Mon Sep 17 00:00:00 2001 From: "Joe S. Boyle" Date: Sat, 11 Mar 2023 18:57:52 +0000 Subject: [PATCH 5/5] Remove newly added redundant tests --- Lib/test/test_email/test_email.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 2df1453328b4d8..44b405740c4403 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -1563,23 +1563,6 @@ def test_add_header(self): self.assertIs(self._au.get_param('attachment', missing, header='foobar'), missing) - def test_infer_audio_content_subtypes(self): - data_to_subtype = { - b'FORM AIFC' : 'x-aiff', - b'FORM AIFF' : 'x-aiff', - b'RIFF WAVEfmt ': 'x-wav', - b'.snd' : 'basic', - - # Check that the 4 intermediate chars have no effect - b'FORMabcdAIFC' : 'x-aiff', - b'FORMzyxwAIFF' : 'x-aiff', - b'RIFF1234WAVEfmt ': 'x-wav', - # Check that anything after the first 4 chars has no effect - b'.sndFOOBARZ' : 'basic', - } - for audiodata, subtype in data_to_subtype.items(): - au = MIMEAudio(audiodata) - self.assertEqual(au.get_content_subtype(), subtype) # Test the basic MIMEImage class