Skip to content

Commit a4f4627

Browse files
authored
Merge pull request #31 from DataDog/backport-3ed57e4-3.10
[3.10] pythongh-61460: Stronger HMAC in multiprocessing (pythonGH-20380)
2 parents 8773554 + 42f8222 commit a4f4627

File tree

3 files changed

+254
-27
lines changed

3 files changed

+254
-27
lines changed

Lib/multiprocessing/connection.py

Lines changed: 205 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -723,39 +723,227 @@ def PipeClient(address):
723723
# Authentication stuff
724724
#
725725

726-
MESSAGE_LENGTH = 20
726+
MESSAGE_LENGTH = 40 # MUST be > 20
727727

728-
CHALLENGE = b'#CHALLENGE#'
729-
WELCOME = b'#WELCOME#'
730-
FAILURE = b'#FAILURE#'
728+
_CHALLENGE = b'#CHALLENGE#'
729+
_WELCOME = b'#WELCOME#'
730+
_FAILURE = b'#FAILURE#'
731731

732-
def deliver_challenge(connection, authkey):
732+
# multiprocessing.connection Authentication Handshake Protocol Description
733+
# (as documented for reference after reading the existing code)
734+
# =============================================================================
735+
#
736+
# On Windows: native pipes with "overlapped IO" are used to send the bytes,
737+
# instead of the length prefix SIZE scheme described below. (ie: the OS deals
738+
# with message sizes for us)
739+
#
740+
# Protocol error behaviors:
741+
#
742+
# On POSIX, any failure to receive the length prefix into SIZE, for SIZE greater
743+
# than the requested maxsize to receive, or receiving fewer than SIZE bytes
744+
# results in the connection being closed and auth to fail.
745+
#
746+
# On Windows, receiving too few bytes is never a low level _recv_bytes read
747+
# error, receiving too many will trigger an error only if receive maxsize
748+
# value was larger than 128 OR the if the data arrived in smaller pieces.
749+
#
750+
# Serving side Client side
751+
# ------------------------------ ---------------------------------------
752+
# 0. Open a connection on the pipe.
753+
# 1. Accept connection.
754+
# 2. Random 20+ bytes -> MESSAGE
755+
# Modern servers always send
756+
# more than 20 bytes and include
757+
# a {digest} prefix on it with
758+
# their preferred HMAC digest.
759+
# Legacy ones send ==20 bytes.
760+
# 3. send 4 byte length (net order)
761+
# prefix followed by:
762+
# b'#CHALLENGE#' + MESSAGE
763+
# 4. Receive 4 bytes, parse as network byte
764+
# order integer. If it is -1, receive an
765+
# additional 8 bytes, parse that as network
766+
# byte order. The result is the length of
767+
# the data that follows -> SIZE.
768+
# 5. Receive min(SIZE, 256) bytes -> M1
769+
# 6. Assert that M1 starts with:
770+
# b'#CHALLENGE#'
771+
# 7. Strip that prefix from M1 into -> M2
772+
# 7.1. Parse M2: if it is exactly 20 bytes in
773+
# length this indicates a legacy server
774+
# supporting only HMAC-MD5. Otherwise the
775+
# 7.2. preferred digest is looked up from an
776+
# expected "{digest}" prefix on M2. No prefix
777+
# or unsupported digest? <- AuthenticationError
778+
# 7.3. Put divined algorithm name in -> D_NAME
779+
# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST
780+
# 9. Send 4 byte length prefix (net order)
781+
# followed by C_DIGEST bytes.
782+
# 10. Receive 4 or 4+8 byte length
783+
# prefix (#4 dance) -> SIZE.
784+
# 11. Receive min(SIZE, 256) -> C_D.
785+
# 11.1. Parse C_D: legacy servers
786+
# accept it as is, "md5" -> D_NAME
787+
# 11.2. modern servers check the length
788+
# of C_D, IF it is 16 bytes?
789+
# 11.2.1. "md5" -> D_NAME
790+
# and skip to step 12.
791+
# 11.3. longer? expect and parse a "{digest}"
792+
# prefix into -> D_NAME.
793+
# Strip the prefix and store remaining
794+
# bytes in -> C_D.
795+
# 11.4. Don't like D_NAME? <- AuthenticationError
796+
# 12. Compute HMAC-D_NAME of AUTHKEY,
797+
# MESSAGE into -> M_DIGEST.
798+
# 13. Compare M_DIGEST == C_D:
799+
# 14a: Match? Send length prefix &
800+
# b'#WELCOME#'
801+
# <- RETURN
802+
# 14b: Mismatch? Send len prefix &
803+
# b'#FAILURE#'
804+
# <- CLOSE & AuthenticationError
805+
# 15. Receive 4 or 4+8 byte length prefix (net
806+
# order) again as in #4 into -> SIZE.
807+
# 16. Receive min(SIZE, 256) bytes -> M3.
808+
# 17. Compare M3 == b'#WELCOME#':
809+
# 17a. Match? <- RETURN
810+
# 17b. Mismatch? <- CLOSE & AuthenticationError
811+
#
812+
# If this RETURNed, the connection remains open: it has been authenticated.
813+
#
814+
# Length prefixes are used consistently. Even on the legacy protocol, this
815+
# was good fortune and allowed us to evolve the protocol by using the length
816+
# of the opening challenge or length of the returned digest as a signal as
817+
# to which protocol the other end supports.
818+
819+
_ALLOWED_DIGESTS = frozenset(
820+
{b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'})
821+
_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS)
822+
823+
# Old hmac-md5 only server versions from Python <=3.11 sent a message of this
824+
# length. It happens to not match the length of any supported digest so we can
825+
# use a message of this length to indicate that we should work in backwards
826+
# compatible md5-only mode without a {digest_name} prefix on our response.
827+
_MD5ONLY_MESSAGE_LENGTH = 20
828+
_MD5_DIGEST_LEN = 16
829+
_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN)
830+
831+
832+
def _get_digest_name_and_payload(message: bytes) -> (str, bytes):
833+
"""Returns a digest name and the payload for a response hash.
834+
835+
If a legacy protocol is detected based on the message length
836+
or contents the digest name returned will be empty to indicate
837+
legacy mode where MD5 and no digest prefix should be sent.
838+
"""
839+
# modern message format: b"{digest}payload" longer than 20 bytes
840+
# legacy message format: 16 or 20 byte b"payload"
841+
if len(message) in _LEGACY_LENGTHS:
842+
# Either this was a legacy server challenge, or we're processing
843+
# a reply from a legacy client that sent an unprefixed 16-byte
844+
# HMAC-MD5 response. All messages using the modern protocol will
845+
# be longer than either of these lengths.
846+
return '', message
847+
if (message.startswith(b'{') and
848+
(curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0):
849+
digest = message[1:curly]
850+
if digest in _ALLOWED_DIGESTS:
851+
payload = message[curly+1:]
852+
return digest.decode('ascii'), payload
853+
raise AuthenticationError(
854+
'unsupported message length, missing digest prefix, '
855+
f'or unsupported digest: {message=}')
856+
857+
858+
def _create_response(authkey, message):
859+
"""Create a MAC based on authkey and message
860+
861+
The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or
862+
the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response
863+
is the raw MAC, otherwise the response is prefixed with '{digest_name}',
864+
e.g. b'{sha256}abcdefg...'
865+
866+
Note: The MAC protects the entire message including the digest_name prefix.
867+
"""
733868
import hmac
869+
digest_name = _get_digest_name_and_payload(message)[0]
870+
# The MAC protects the entire message: digest header and payload.
871+
if not digest_name:
872+
# Legacy server without a {digest} prefix on message.
873+
# Generate a legacy non-prefixed HMAC-MD5 reply.
874+
try:
875+
return hmac.new(authkey, message, 'md5').digest()
876+
except ValueError:
877+
# HMAC-MD5 is not available (FIPS mode?), fall back to
878+
# HMAC-SHA2-256 modern protocol. The legacy server probably
879+
# doesn't support it and will reject us anyways. :shrug:
880+
digest_name = 'sha256'
881+
# Modern protocol, indicate the digest used in the reply.
882+
response = hmac.new(authkey, message, digest_name).digest()
883+
return b'{%s}%s' % (digest_name.encode('ascii'), response)
884+
885+
886+
def _verify_challenge(authkey, message, response):
887+
"""Verify MAC challenge
888+
889+
If our message did not include a digest_name prefix, the client is allowed
890+
to select a stronger digest_name from _ALLOWED_DIGESTS.
891+
892+
In case our message is prefixed, a client cannot downgrade to a weaker
893+
algorithm, because the MAC is calculated over the entire message
894+
including the '{digest_name}' prefix.
895+
"""
896+
import hmac
897+
response_digest, response_mac = _get_digest_name_and_payload(response)
898+
response_digest = response_digest or 'md5'
899+
try:
900+
expected = hmac.new(authkey, message, response_digest).digest()
901+
except ValueError:
902+
raise AuthenticationError(f'{response_digest=} unsupported')
903+
if len(expected) != len(response_mac):
904+
raise AuthenticationError(
905+
f'expected {response_digest!r} of length {len(expected)} '
906+
f'got {len(response_mac)}')
907+
if not hmac.compare_digest(expected, response_mac):
908+
raise AuthenticationError('digest received was wrong')
909+
910+
911+
def deliver_challenge(connection, authkey: bytes, digest_name='sha256'):
734912
if not isinstance(authkey, bytes):
735913
raise ValueError(
736914
"Authkey must be bytes, not {0!s}".format(type(authkey)))
915+
assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint"
737916
message = os.urandom(MESSAGE_LENGTH)
738-
connection.send_bytes(CHALLENGE + message)
739-
digest = hmac.new(authkey, message, 'md5').digest()
917+
message = b'{%s}%s' % (digest_name.encode('ascii'), message)
918+
# Even when sending a challenge to a legacy client that does not support
919+
# digest prefixes, they'll take the entire thing as a challenge and
920+
# respond to it with a raw HMAC-MD5.
921+
connection.send_bytes(_CHALLENGE + message)
740922
response = connection.recv_bytes(256) # reject large message
741-
if response == digest:
742-
connection.send_bytes(WELCOME)
923+
try:
924+
_verify_challenge(authkey, message, response)
925+
except AuthenticationError:
926+
connection.send_bytes(_FAILURE)
927+
raise
743928
else:
744-
connection.send_bytes(FAILURE)
745-
raise AuthenticationError('digest received was wrong')
929+
connection.send_bytes(_WELCOME)
746930

747-
def answer_challenge(connection, authkey):
748-
import hmac
931+
932+
def answer_challenge(connection, authkey: bytes):
749933
if not isinstance(authkey, bytes):
750934
raise ValueError(
751935
"Authkey must be bytes, not {0!s}".format(type(authkey)))
752936
message = connection.recv_bytes(256) # reject large message
753-
assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message
754-
message = message[len(CHALLENGE):]
755-
digest = hmac.new(authkey, message, 'md5').digest()
937+
if not message.startswith(_CHALLENGE):
938+
raise AuthenticationError(
939+
f'Protocol error, expected challenge: {message=}')
940+
message = message[len(_CHALLENGE):]
941+
if len(message) < _MD5ONLY_MESSAGE_LENGTH:
942+
raise AuthenticationError('challenge too short: {len(message)} bytes')
943+
digest = _create_response(authkey, message)
756944
connection.send_bytes(digest)
757945
response = connection.recv_bytes(256) # reject large message
758-
if response != WELCOME:
946+
if response != _WELCOME:
759947
raise AuthenticationError('digest sent was rejected')
760948

761949
#

Lib/test/_test_multiprocessing.py

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import multiprocessing.managers
4848
import multiprocessing.pool
4949
import multiprocessing.queues
50+
from multiprocessing.connection import wait, AuthenticationError
5051

5152
from multiprocessing import util
5253

@@ -125,8 +126,6 @@ def _resource_unlink(name, rtype):
125126

126127
WIN32 = (sys.platform == "win32")
127128

128-
from multiprocessing.connection import wait
129-
130129
def wait_for_handle(handle, timeout):
131130
if timeout is not None and timeout < 0.0:
132131
timeout = None
@@ -2983,7 +2982,7 @@ def test_remote(self):
29832982
del queue
29842983

29852984

2986-
@hashlib_helper.requires_hashdigest('md5')
2985+
@hashlib_helper.requires_hashdigest('sha256')
29872986
class _TestManagerRestart(BaseTestCase):
29882987

29892988
@classmethod
@@ -3468,7 +3467,7 @@ def test_dont_merge(self):
34683467
#
34693468

34703469
@unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction")
3471-
@hashlib_helper.requires_hashdigest('md5')
3470+
@hashlib_helper.requires_hashdigest('sha256')
34723471
class _TestPicklingConnections(BaseTestCase):
34733472

34743473
ALLOWED_TYPES = ('processes',)
@@ -3771,7 +3770,7 @@ def test_copy(self):
37713770

37723771

37733772
@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
3774-
@hashlib_helper.requires_hashdigest('md5')
3773+
@hashlib_helper.requires_hashdigest('sha256')
37753774
class _TestSharedMemory(BaseTestCase):
37763775

37773776
ALLOWED_TYPES = ('processes',)
@@ -4578,7 +4577,7 @@ def test_invalid_handles(self):
45784577

45794578

45804579

4581-
@hashlib_helper.requires_hashdigest('md5')
4580+
@hashlib_helper.requires_hashdigest('sha256')
45824581
class OtherTest(unittest.TestCase):
45834582
# TODO: add more tests for deliver/answer challenge.
45844583
def test_deliver_challenge_auth_failure(self):
@@ -4598,7 +4597,7 @@ def __init__(self):
45984597
def recv_bytes(self, size):
45994598
self.count += 1
46004599
if self.count == 1:
4601-
return multiprocessing.connection.CHALLENGE
4600+
return multiprocessing.connection._CHALLENGE
46024601
elif self.count == 2:
46034602
return b'something bogus'
46044603
return b''
@@ -4608,14 +4607,52 @@ def send_bytes(self, data):
46084607
multiprocessing.connection.answer_challenge,
46094608
_FakeConnection(), b'abc')
46104609

4610+
4611+
@hashlib_helper.requires_hashdigest('md5')
4612+
@hashlib_helper.requires_hashdigest('sha256')
4613+
class ChallengeResponseTest(unittest.TestCase):
4614+
authkey = b'supadupasecretkey'
4615+
4616+
def create_response(self, message):
4617+
return multiprocessing.connection._create_response(
4618+
self.authkey, message
4619+
)
4620+
4621+
def verify_challenge(self, message, response):
4622+
return multiprocessing.connection._verify_challenge(
4623+
self.authkey, message, response
4624+
)
4625+
4626+
def test_challengeresponse(self):
4627+
for algo in [None, "md5", "sha256"]:
4628+
with self.subTest(f"{algo=}"):
4629+
msg = b'is-twenty-bytes-long' # The length of a legacy message.
4630+
if algo:
4631+
prefix = b'{%s}' % algo.encode("ascii")
4632+
else:
4633+
prefix = b''
4634+
msg = prefix + msg
4635+
response = self.create_response(msg)
4636+
if not response.startswith(prefix):
4637+
self.fail(response)
4638+
self.verify_challenge(msg, response)
4639+
4640+
# TODO(gpshead): We need integration tests for handshakes between modern
4641+
# deliver_challenge() and verify_response() code and connections running a
4642+
# test-local copy of the legacy Python <=3.11 implementations.
4643+
4644+
# TODO(gpshead): properly annotate tests for requires_hashdigest rather than
4645+
# only running these on a platform supporting everything. otherwise logic
4646+
# issues preventing it from working on FIPS mode setups will be hidden.
4647+
46114648
#
46124649
# Test Manager.start()/Pool.__init__() initializer feature - see issue 5585
46134650
#
46144651

46154652
def initializer(ns):
46164653
ns.test += 1
46174654

4618-
@hashlib_helper.requires_hashdigest('md5')
4655+
@hashlib_helper.requires_hashdigest('sha256')
46194656
class TestInitializers(unittest.TestCase):
46204657
def setUp(self):
46214658
self.mgr = multiprocessing.Manager()
@@ -5478,7 +5515,7 @@ def is_alive(self):
54785515
any(process.is_alive() for process in forked_processes))
54795516

54805517

5481-
@hashlib_helper.requires_hashdigest('md5')
5518+
@hashlib_helper.requires_hashdigest('sha256')
54825519
class TestSyncManagerTypes(unittest.TestCase):
54835520
"""Test all the types which can be shared between a parent and a
54845521
child process by using a manager which acts as an intermediary
@@ -5905,7 +5942,7 @@ def install_tests_in_module_dict(remote_globs, start_method):
59055942
class Temp(base, Mixin, unittest.TestCase):
59065943
pass
59075944
if type_ == 'manager':
5908-
Temp = hashlib_helper.requires_hashdigest('md5')(Temp)
5945+
Temp = hashlib_helper.requires_hashdigest('sha256')(Temp)
59095946
Temp.__name__ = Temp.__qualname__ = newname
59105947
Temp.__module__ = __module__
59115948
remote_globs[newname] = Temp
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:mod:`multiprocessing` now supports stronger HMAC algorithms for inter-process
2+
connection authentication rather than only HMAC-MD5.

0 commit comments

Comments
 (0)