From c7f768020298bf2fabb349930eb51e842d5f56a1 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Mon, 25 May 2020 12:42:54 +0200 Subject: [PATCH 1/3] bpo-17258: Stronger HMAC in multiprocessing Signed-off-by: Christian Heimes --- Lib/multiprocessing/connection.py | 80 +++++++++++++++++-- Lib/test/_test_multiprocessing.py | 32 +++++++- .../2020-05-25-12-42-36.bpo-17258.lf2554.rst | 1 + 3 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 510e4b5aba44a6..1bc41f97ad6123 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -11,6 +11,7 @@ import io import os +import re import sys import socket import struct @@ -734,30 +735,93 @@ def PipeClient(address): WELCOME = b'#WELCOME#' FAILURE = b'#FAILURE#' -def deliver_challenge(connection, authkey): +_mac_algo_re = re.compile( + rb'^{(?P(md5|sha256|sha384|sha3_256|sha3_384))}' + rb'(?P.*)$' +) + +def _create_response(authkey, message): + """Create a MAC based on authkey and message + + The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or + the message has a '{digestmod}' prefix. For legacy HMAC-MD5, the response + is the raw MAC, otherwise the response is prefixed with '{digestmod}', + e.g. b'{sha256}abcdefg...' + + Note: The MAC protects the entire message including the digestmod prefix. + """ + import hmac + # message: {digest}payload, the MAC protects header and payload + mo = _mac_algo_re.match(message) + if mo is not None: + digestmod = mo.group('digestmod').decode('ascii') + else: + # old-style MD5 with fallback + digestmod = None + + if digestmod is None: + try: + return hmac.new(authkey, message, 'md5').digest() + except ValueError: + # MD5 is not available, fall back to SHA2-256 + digestmod = 'sha256' + prefix = b'{%s}' % digestmod.encode('ascii') + return prefix + hmac.new(authkey, message, digestmod).digest() + + +def _verify_challenge(authkey, message, response): + """Verify MAC challenge + + If our message did not include a digestmod prefix, the client is allowed + to select a stronger digestmod (HMAC-MD5 legacy to HMAC-SHA2-256). + + In case our message is prefixed, a client cannot downgrade to a weaker + algorithm, because the MAC is calculated over the entire message + including the '{digestmod}' prefix. + """ import hmac + mo = _mac_algo_re.match(response) + if mo is not None: + # get digestmod from response. + digestmod = mo.group('digestmod').decode('ascii') + mac = mo.group('payload') + else: + digestmod = 'md5' + mac = response + try: + expected = hmac.new(authkey, message, digestmod).digest() + except ValueError: + raise AuthenticationError(f'unsupported digest {digestmod}') + if not hmac.compare_digest(expected, mac): + raise AuthenticationError('digest received was wrong') + return True + + +def deliver_challenge(connection, authkey, digestmod=None): if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) message = os.urandom(MESSAGE_LENGTH) + if digestmod is not None: + message = b'{%s}%s' % (digestmod.encode('ascii'), message) connection.send_bytes(CHALLENGE + message) - digest = hmac.new(authkey, message, 'md5').digest() response = connection.recv_bytes(256) # reject large message - if response == digest: - connection.send_bytes(WELCOME) - else: + try: + _verify_challenge(authkey, message, response) + except AuthenticationError: connection.send_bytes(FAILURE) - raise AuthenticationError('digest received was wrong') + raise + else: + connection.send_bytes(WELCOME) def answer_challenge(connection, authkey): - import hmac if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) message = connection.recv_bytes(256) # reject large message assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message message = message[len(CHALLENGE):] - digest = hmac.new(authkey, message, 'md5').digest() + digest = _create_response(authkey, message) connection.send_bytes(digest) response = connection.recv_bytes(256) # reject large message if response != WELCOME: diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index fd3b4303f034c1..d5d7d5a0931641 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -46,6 +46,7 @@ import multiprocessing.managers import multiprocessing.pool import multiprocessing.queues +from multiprocessing.connection import wait, AuthenticationError from multiprocessing import util @@ -118,8 +119,6 @@ def _resource_unlink(name, rtype): WIN32 = (sys.platform == "win32") -from multiprocessing.connection import wait - def wait_for_handle(handle, timeout): if timeout is not None and timeout < 0.0: timeout = None @@ -4494,6 +4493,35 @@ def send_bytes(self, data): multiprocessing.connection.answer_challenge, _FakeConnection(), b'abc') + +@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') +class ChallengeResponseTest(unittest.TestCase): + authkey = b'supadupasecretkey' + + def create_response(self, message): + return multiprocessing.connection._create_response( + self.authkey, message + ) + + def verify_challenge(self, message, response): + return multiprocessing.connection._verify_challenge( + self.authkey, message, response + ) + + def test_challengeresponse(self): + for algo in [None, "md5", "sha256"]: + msg = b'mymessage' + if algo is not None: + prefix = b'{%s}' % algo.encode("ascii") + else: + prefix = b'' + msg = prefix + msg + response = self.create_response(msg) + if not response.startswith(prefix): + self.fail(response) + self.verify_challenge(msg, response) + # # Test Manager.start()/Pool.__init__() initializer feature - see issue 5585 # diff --git a/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst b/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst new file mode 100644 index 00000000000000..b06e4a3bd88015 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst @@ -0,0 +1 @@ +:mod:`multiprocessing` supports stronger HMAC algorithms From ab1d60ce36eef42bc368e91dc80f3a1317171b83 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Sun, 20 Nov 2022 21:23:01 +0000 Subject: [PATCH 2/3] Reworked to be more robust while keeping the idea. The protocol modification idea remains, but we now take advantage of the message length as an indicator of legacy vs modern protocol version. No more regular expression usage. We now default to HMAC-SHA256, but do so in a way that will be compatible when communicating with older clients or older servers. No protocol transition period is needed. More unittests to verify these claims remain true are required. --- Lib/multiprocessing/connection.py | 171 ++++++++++++------ Lib/test/_test_multiprocessing.py | 45 +++-- .../2020-05-25-12-42-36.bpo-17258.lf2554.rst | 3 +- 3 files changed, 144 insertions(+), 75 deletions(-) diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 2483d93cc7d591..e8191a1a3c10a4 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -11,7 +11,6 @@ import io import os -import re import sys import socket import struct @@ -723,11 +722,11 @@ def PipeClient(address): # Authentication stuff # -MESSAGE_LENGTH = 20 +MESSAGE_LENGTH = 40 # MUST be > 20 -CHALLENGE = b'#CHALLENGE#' -WELCOME = b'#WELCOME#' -FAILURE = b'#FAILURE#' +_CHALLENGE = b'#CHALLENGE#' +_WELCOME = b'#WELCOME#' +_FAILURE = b'#FAILURE#' # multiprocessing.connection Authentication Handshake Protocol Description # (as documented for reference after reading the existing code) @@ -751,7 +750,12 @@ def PipeClient(address): # ------------------------------ --------------------------------------- # 0. Open a connection on the pipe. # 1. Accept connection. -# 2. New random 20 bytes -> MESSAGE +# 2. Random 20+ bytes -> MESSAGE +# Modern servers always send +# more than 20 bytes and include +# a {digest} prefix on it with +# their preferred HMAC digest. +# Legacy ones send ==20 bytes. # 3. send 4 byte length (net order) # prefix followed by: # b'#CHALLENGE#' + MESSAGE @@ -764,14 +768,32 @@ def PipeClient(address): # 6. Assert that M1 starts with: # b'#CHALLENGE#' # 7. Strip that prefix from M1 into -> M2 -# 8. Compute HMAC-MD5 of AUTHKEY, M2 -> C_DIGEST +# 7.1. Parse M2: if it is exactly 20 bytes in +# length this indicates a legacy server +# supporting only HMAC-MD5. Otherwise the +# 7.2. preferred digest is looked up from an +# expected "{digest}" prefix on M2. No prefix +# or unsupported digest? <- AuthenticationError +# 7.3. Put divined algorithm name in -> D_NAME +# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST # 9. Send 4 byte length prefix (net order) # followed by C_DIGEST bytes. -# 10. Compute HMAC-MD5 of AUTHKEY, -# MESSAGE into -> M_DIGEST. -# 11. Receive 4 or 4+8 byte length +# 10. Receive 4 or 4+8 byte length # prefix (#4 dance) -> SIZE. -# 12. Receive min(SIZE, 256) -> C_D. +# 11. Receive min(SIZE, 256) -> C_D. +# 11.1. Parse C_D: legacy servers +# accept it as is, "md5" -> D_NAME +# 11.2. modern servers check the length +# of C_D, IF it is 16 bytes? +# 11.2.1. "md5" -> D_NAME +# and skip to step 12. +# 11.3. longer? expect and parse a "{digest}" +# prefix into -> D_NAME. +# Strip the prefix and store remaining +# bytes in -> C_D. +# 11.4. Don't like D_NAME? <- AuthenticationError +# 12. Compute HMAC-D_NAME of AUTHKEY, +# MESSAGE into -> M_DIGEST. # 13. Compare M_DIGEST == C_D: # 14a: Match? Send length prefix & # b'#WELCOME#' @@ -797,97 +819,134 @@ def PipeClient(address): # opening challenge message as an indicator of protocol version may work. -_mac_algo_re = re.compile( - rb'^{(?P(md5|sha256|sha384|sha3_256|sha3_384))}' - rb'(?P.*)$' -) +_ALLOWED_DIGESTS = frozenset( + {b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'}) +_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS) + +# Old hmac-md5 only server versions from Python <=3.11 sent a message of this +# length. It happens to not match the length of any supported digest so we can +# use a message of this length to indicate that we should work in backwards +# compatible md5-only mode without a {digest_name} prefix on our response. +_MD5ONLY_MESSAGE_LENGTH = 20 +_MD5_DIGEST_LEN = 16 +_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN) + + +def _get_digest_name_and_payload(message: bytes) -> (str, bytes): + """Returns a digest name and the payload for a response hash. + + If a legacy protocol is detected based on the message length + or contents the digest name returned will be empty to indicate + legacy mode where MD5 and no digest prefix should be sent. + """ + # modern message format: b"{digest}payload" longer than 20 bytes + # legacy message format: 16 or 20 byte b"payload" + if len(message) in _LEGACY_LENGTHS: + # Either this was a legacy server challenge, or we're processing + # a reply from a legacy client that sent an unprefixed 16-byte + # HMAC-MD5 response. All messages using the modern protocol will + # be longer than either of these lengths. + return '', message + if (message.startswith(b'{') and + (curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0): + digest = message[1:curly] + if digest in _ALLOWED_DIGESTS: + payload = message[curly+1:] + return digest.decode('ascii'), payload + raise AuthenticationError( + 'unsupported message length, missing digest prefix, ' + f'or unsupported digest: {message=}') def _create_response(authkey, message): """Create a MAC based on authkey and message The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or - the message has a '{digestmod}' prefix. For legacy HMAC-MD5, the response - is the raw MAC, otherwise the response is prefixed with '{digestmod}', + the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response + is the raw MAC, otherwise the response is prefixed with '{digest_name}', e.g. b'{sha256}abcdefg...' - Note: The MAC protects the entire message including the digestmod prefix. + Note: The MAC protects the entire message including the digest_name prefix. """ import hmac - # message: {digest}payload, the MAC protects header and payload - mo = _mac_algo_re.match(message) - if mo is not None: - digestmod = mo.group('digestmod').decode('ascii') - else: - # old-style MD5 with fallback - digestmod = None - - if digestmod is None: + digest_name = _get_digest_name_and_payload(message)[0] + # The MAC protects the entire message: digest header and payload. + if not digest_name: + # Legacy server without a {digest} prefix on message. + # Generate a legacy non-prefixed HMAC-MD5 reply. try: return hmac.new(authkey, message, 'md5').digest() except ValueError: - # MD5 is not available, fall back to SHA2-256 - digestmod = 'sha256' - prefix = b'{%s}' % digestmod.encode('ascii') - return prefix + hmac.new(authkey, message, digestmod).digest() + # HMAC-MD5 is not available (FIPS mode?), fall back to + # HMAC-SHA2-256 modern protocol. The legacy server probably + # doesn't support it and will reject us anyways. :shrug: + digest_name = 'sha256' + # Modern protocol, indicate the digest used in the reply. + response = hmac.new(authkey, message, digest_name).digest() + return b'{%s}%s' % (digest_name.encode('ascii'), response) def _verify_challenge(authkey, message, response): """Verify MAC challenge - If our message did not include a digestmod prefix, the client is allowed - to select a stronger digestmod (HMAC-MD5 legacy to HMAC-SHA2-256). + If our message did not include a digest_name prefix, the client is allowed + to select a stronger digest_name from _ALLOWED_DIGESTS. In case our message is prefixed, a client cannot downgrade to a weaker algorithm, because the MAC is calculated over the entire message - including the '{digestmod}' prefix. + including the '{digest_name}' prefix. """ import hmac - mo = _mac_algo_re.match(response) - if mo is not None: - # get digestmod from response. - digestmod = mo.group('digestmod').decode('ascii') - mac = mo.group('payload') - else: - digestmod = 'md5' - mac = response + response_digest, response_mac = _get_digest_name_and_payload(response) + response_digest = response_digest or 'md5' try: - expected = hmac.new(authkey, message, digestmod).digest() + expected = hmac.new(authkey, message, response_digest).digest() except ValueError: - raise AuthenticationError(f'unsupported digest {digestmod}') - if not hmac.compare_digest(expected, mac): + raise AuthenticationError(f'{response_digest=} unsupported') + if len(expected) != len(response_mac): + raise AuthenticationError( + f'expected {response_digest!r} of length {len(expected)} ' + f'got {len(response_mac)}') + if not hmac.compare_digest(expected, response_mac): raise AuthenticationError('digest received was wrong') - return True -def deliver_challenge(connection, authkey, digestmod=None): +def deliver_challenge(connection, authkey: bytes, digest_name='sha256'): if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) + assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint" message = os.urandom(MESSAGE_LENGTH) - if digestmod is not None: - message = b'{%s}%s' % (digestmod.encode('ascii'), message) - connection.send_bytes(CHALLENGE + message) + message = b'{%s}%s' % (digest_name.encode('ascii'), message) + # Even when sending a challenge to a legacy client that does not support + # digest prefixes, they'll take the entire thing as a challenge and + # respond to it with a raw HMAC-MD5. + connection.send_bytes(_CHALLENGE + message) response = connection.recv_bytes(256) # reject large message try: _verify_challenge(authkey, message, response) except AuthenticationError: - connection.send_bytes(FAILURE) + connection.send_bytes(_FAILURE) raise else: - connection.send_bytes(WELCOME) + connection.send_bytes(_WELCOME) + -def answer_challenge(connection, authkey): +def answer_challenge(connection, authkey: bytes): if not isinstance(authkey, bytes): raise ValueError( "Authkey must be bytes, not {0!s}".format(type(authkey))) message = connection.recv_bytes(256) # reject large message - assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message - message = message[len(CHALLENGE):] + if not message.startswith(_CHALLENGE): + raise AuthenticationError( + f'Protocol error, expected challenge: {message=}') + message = message[len(_CHALLENGE):] + if len(message) < _MD5ONLY_MESSAGE_LENGTH: + raise AuthenticationError('challenge too short: {len(message)} bytes') digest = _create_response(authkey, message) connection.send_bytes(digest) response = connection.recv_bytes(256) # reject large message - if response != WELCOME: + if response != _WELCOME: raise AuthenticationError('digest sent was rejected') # diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index e4c7af49723868..1671b04413846d 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -3041,7 +3041,7 @@ def test_remote(self): del queue -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestManagerRestart(BaseTestCase): @classmethod @@ -3530,7 +3530,7 @@ def test_dont_merge(self): # @unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction") -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestPicklingConnections(BaseTestCase): ALLOWED_TYPES = ('processes',) @@ -3833,7 +3833,7 @@ def test_copy(self): @unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory") -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class _TestSharedMemory(BaseTestCase): ALLOWED_TYPES = ('processes',) @@ -4635,7 +4635,7 @@ def test_invalid_handles(self): -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class OtherTest(unittest.TestCase): # TODO: add more tests for deliver/answer challenge. def test_deliver_challenge_auth_failure(self): @@ -4655,7 +4655,7 @@ def __init__(self): def recv_bytes(self, size): self.count += 1 if self.count == 1: - return multiprocessing.connection.CHALLENGE + return multiprocessing.connection._CHALLENGE elif self.count == 2: return b'something bogus' return b'' @@ -4683,16 +4683,25 @@ def verify_challenge(self, message, response): def test_challengeresponse(self): for algo in [None, "md5", "sha256"]: - msg = b'mymessage' - if algo is not None: - prefix = b'{%s}' % algo.encode("ascii") - else: - prefix = b'' - msg = prefix + msg - response = self.create_response(msg) - if not response.startswith(prefix): - self.fail(response) - self.verify_challenge(msg, response) + with self.subTest(f"{algo=}"): + msg = b'is-twenty-bytes-long' # The length of a legacy message. + if algo: + prefix = b'{%s}' % algo.encode("ascii") + else: + prefix = b'' + msg = prefix + msg + response = self.create_response(msg) + if not response.startswith(prefix): + self.fail(response) + self.verify_challenge(msg, response) + + # TODO(gpshead): We need integration tests for handshakes between modern + # deliver_challenge() and verify_response() code and connections running a + # test-local copy of the legacy Python <=3.11 implementations. + + # TODO(gpshead): properly annotate tests for requires_hashdigest rather than + # only running these on a platform supporting everything. otherwise logic + # issues preventing it from working on FIPS mode setups will be hidden. # # Test Manager.start()/Pool.__init__() initializer feature - see issue 5585 @@ -4701,7 +4710,7 @@ def test_challengeresponse(self): def initializer(ns): ns.test += 1 -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class TestInitializers(unittest.TestCase): def setUp(self): self.mgr = multiprocessing.Manager() @@ -5561,7 +5570,7 @@ def is_alive(self): any(process.is_alive() for process in forked_processes)) -@hashlib_helper.requires_hashdigest('md5') +@hashlib_helper.requires_hashdigest('sha256') class TestSyncManagerTypes(unittest.TestCase): """Test all the types which can be shared between a parent and a child process by using a manager which acts as an intermediary @@ -5993,7 +6002,7 @@ def install_tests_in_module_dict(remote_globs, start_method): class Temp(base, Mixin, unittest.TestCase): pass if type_ == 'manager': - Temp = hashlib_helper.requires_hashdigest('md5')(Temp) + Temp = hashlib_helper.requires_hashdigest('sha256')(Temp) Temp.__name__ = Temp.__qualname__ = newname Temp.__module__ = __module__ remote_globs[newname] = Temp diff --git a/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst b/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst index b06e4a3bd88015..18ebd6e140cff7 100644 --- a/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst +++ b/Misc/NEWS.d/next/Library/2020-05-25-12-42-36.bpo-17258.lf2554.rst @@ -1 +1,2 @@ -:mod:`multiprocessing` supports stronger HMAC algorithms +:mod:`multiprocessing` now supports stronger HMAC algorithms for inter-process +connection authentication rather than only HMAC-MD5. From ee5e6ff94c00a564b09c814cf4b73f17fb3a6c52 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 10 Dec 2022 16:37:33 -0800 Subject: [PATCH 3/3] further update to the comment to match impl. --- Lib/multiprocessing/connection.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index e8191a1a3c10a4..04eaea811cfbbe 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -810,14 +810,10 @@ def PipeClient(address): # # If this RETURNed, the connection remains open: it has been authenticated. # -# Length prefixes are used consistently even though every step so far has -# always been a singular specific fixed length. This may help us evolve -# the protocol in the future without breaking backwards compatibility. -# -# Similarly the initial challenge message from the serving side has always -# been 20 bytes, but clients can accept a 100+ so using the length of the -# opening challenge message as an indicator of protocol version may work. - +# Length prefixes are used consistently. Even on the legacy protocol, this +# was good fortune and allowed us to evolve the protocol by using the length +# of the opening challenge or length of the returned digest as a signal as +# to which protocol the other end supports. _ALLOWED_DIGESTS = frozenset( {b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'})