Skip to content

Commit ab1d60c

Browse files
committed
Reworked to be more robust while keeping the idea.
The protocol modification idea remains, but we now take advantage of the message length as an indicator of legacy vs modern protocol version. No more regular expression usage. We now default to HMAC-SHA256, but do so in a way that will be compatible when communicating with older clients or older servers. No protocol transition period is needed. More unittests to verify these claims remain true are required.
1 parent 954018a commit ab1d60c

File tree

3 files changed

+144
-75
lines changed

3 files changed

+144
-75
lines changed

Lib/multiprocessing/connection.py

Lines changed: 115 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
import io
1313
import os
14-
import re
1514
import sys
1615
import socket
1716
import struct
@@ -723,11 +722,11 @@ def PipeClient(address):
723722
# Authentication stuff
724723
#
725724

726-
MESSAGE_LENGTH = 20
725+
MESSAGE_LENGTH = 40 # MUST be > 20
727726

728-
CHALLENGE = b'#CHALLENGE#'
729-
WELCOME = b'#WELCOME#'
730-
FAILURE = b'#FAILURE#'
727+
_CHALLENGE = b'#CHALLENGE#'
728+
_WELCOME = b'#WELCOME#'
729+
_FAILURE = b'#FAILURE#'
731730

732731
# multiprocessing.connection Authentication Handshake Protocol Description
733732
# (as documented for reference after reading the existing code)
@@ -751,7 +750,12 @@ def PipeClient(address):
751750
# ------------------------------ ---------------------------------------
752751
# 0. Open a connection on the pipe.
753752
# 1. Accept connection.
754-
# 2. New random 20 bytes -> MESSAGE
753+
# 2. Random 20+ bytes -> MESSAGE
754+
# Modern servers always send
755+
# more than 20 bytes and include
756+
# a {digest} prefix on it with
757+
# their preferred HMAC digest.
758+
# Legacy ones send ==20 bytes.
755759
# 3. send 4 byte length (net order)
756760
# prefix followed by:
757761
# b'#CHALLENGE#' + MESSAGE
@@ -764,14 +768,32 @@ def PipeClient(address):
764768
# 6. Assert that M1 starts with:
765769
# b'#CHALLENGE#'
766770
# 7. Strip that prefix from M1 into -> M2
767-
# 8. Compute HMAC-MD5 of AUTHKEY, M2 -> C_DIGEST
771+
# 7.1. Parse M2: if it is exactly 20 bytes in
772+
# length this indicates a legacy server
773+
# supporting only HMAC-MD5. Otherwise the
774+
# 7.2. preferred digest is looked up from an
775+
# expected "{digest}" prefix on M2. No prefix
776+
# or unsupported digest? <- AuthenticationError
777+
# 7.3. Put divined algorithm name in -> D_NAME
778+
# 8. Compute HMAC-D_NAME of AUTHKEY, M2 -> C_DIGEST
768779
# 9. Send 4 byte length prefix (net order)
769780
# followed by C_DIGEST bytes.
770-
# 10. Compute HMAC-MD5 of AUTHKEY,
771-
# MESSAGE into -> M_DIGEST.
772-
# 11. Receive 4 or 4+8 byte length
781+
# 10. Receive 4 or 4+8 byte length
773782
# prefix (#4 dance) -> SIZE.
774-
# 12. Receive min(SIZE, 256) -> C_D.
783+
# 11. Receive min(SIZE, 256) -> C_D.
784+
# 11.1. Parse C_D: legacy servers
785+
# accept it as is, "md5" -> D_NAME
786+
# 11.2. modern servers check the length
787+
# of C_D, IF it is 16 bytes?
788+
# 11.2.1. "md5" -> D_NAME
789+
# and skip to step 12.
790+
# 11.3. longer? expect and parse a "{digest}"
791+
# prefix into -> D_NAME.
792+
# Strip the prefix and store remaining
793+
# bytes in -> C_D.
794+
# 11.4. Don't like D_NAME? <- AuthenticationError
795+
# 12. Compute HMAC-D_NAME of AUTHKEY,
796+
# MESSAGE into -> M_DIGEST.
775797
# 13. Compare M_DIGEST == C_D:
776798
# 14a: Match? Send length prefix &
777799
# b'#WELCOME#'
@@ -797,97 +819,134 @@ def PipeClient(address):
797819
# opening challenge message as an indicator of protocol version may work.
798820

799821

800-
_mac_algo_re = re.compile(
801-
rb'^{(?P<digestmod>(md5|sha256|sha384|sha3_256|sha3_384))}'
802-
rb'(?P<payload>.*)$'
803-
)
822+
_ALLOWED_DIGESTS = frozenset(
823+
{b'md5', b'sha256', b'sha384', b'sha3_256', b'sha3_384'})
824+
_MAX_DIGEST_LEN = max(len(_) for _ in _ALLOWED_DIGESTS)
825+
826+
# Old hmac-md5 only server versions from Python <=3.11 sent a message of this
827+
# length. It happens to not match the length of any supported digest so we can
828+
# use a message of this length to indicate that we should work in backwards
829+
# compatible md5-only mode without a {digest_name} prefix on our response.
830+
_MD5ONLY_MESSAGE_LENGTH = 20
831+
_MD5_DIGEST_LEN = 16
832+
_LEGACY_LENGTHS = (_MD5ONLY_MESSAGE_LENGTH, _MD5_DIGEST_LEN)
833+
834+
835+
def _get_digest_name_and_payload(message: bytes) -> (str, bytes):
836+
"""Returns a digest name and the payload for a response hash.
837+
838+
If a legacy protocol is detected based on the message length
839+
or contents the digest name returned will be empty to indicate
840+
legacy mode where MD5 and no digest prefix should be sent.
841+
"""
842+
# modern message format: b"{digest}payload" longer than 20 bytes
843+
# legacy message format: 16 or 20 byte b"payload"
844+
if len(message) in _LEGACY_LENGTHS:
845+
# Either this was a legacy server challenge, or we're processing
846+
# a reply from a legacy client that sent an unprefixed 16-byte
847+
# HMAC-MD5 response. All messages using the modern protocol will
848+
# be longer than either of these lengths.
849+
return '', message
850+
if (message.startswith(b'{') and
851+
(curly := message.find(b'}', 1, _MAX_DIGEST_LEN+2)) > 0):
852+
digest = message[1:curly]
853+
if digest in _ALLOWED_DIGESTS:
854+
payload = message[curly+1:]
855+
return digest.decode('ascii'), payload
856+
raise AuthenticationError(
857+
'unsupported message length, missing digest prefix, '
858+
f'or unsupported digest: {message=}')
804859

805860

806861
def _create_response(authkey, message):
807862
"""Create a MAC based on authkey and message
808863
809864
The MAC algorithm defaults to HMAC-MD5, unless MD5 is not available or
810-
the message has a '{digestmod}' prefix. For legacy HMAC-MD5, the response
811-
is the raw MAC, otherwise the response is prefixed with '{digestmod}',
865+
the message has a '{digest_name}' prefix. For legacy HMAC-MD5, the response
866+
is the raw MAC, otherwise the response is prefixed with '{digest_name}',
812867
e.g. b'{sha256}abcdefg...'
813868
814-
Note: The MAC protects the entire message including the digestmod prefix.
869+
Note: The MAC protects the entire message including the digest_name prefix.
815870
"""
816871
import hmac
817-
# message: {digest}payload, the MAC protects header and payload
818-
mo = _mac_algo_re.match(message)
819-
if mo is not None:
820-
digestmod = mo.group('digestmod').decode('ascii')
821-
else:
822-
# old-style MD5 with fallback
823-
digestmod = None
824-
825-
if digestmod is None:
872+
digest_name = _get_digest_name_and_payload(message)[0]
873+
# The MAC protects the entire message: digest header and payload.
874+
if not digest_name:
875+
# Legacy server without a {digest} prefix on message.
876+
# Generate a legacy non-prefixed HMAC-MD5 reply.
826877
try:
827878
return hmac.new(authkey, message, 'md5').digest()
828879
except ValueError:
829-
# MD5 is not available, fall back to SHA2-256
830-
digestmod = 'sha256'
831-
prefix = b'{%s}' % digestmod.encode('ascii')
832-
return prefix + hmac.new(authkey, message, digestmod).digest()
880+
# HMAC-MD5 is not available (FIPS mode?), fall back to
881+
# HMAC-SHA2-256 modern protocol. The legacy server probably
882+
# doesn't support it and will reject us anyways. :shrug:
883+
digest_name = 'sha256'
884+
# Modern protocol, indicate the digest used in the reply.
885+
response = hmac.new(authkey, message, digest_name).digest()
886+
return b'{%s}%s' % (digest_name.encode('ascii'), response)
833887

834888

835889
def _verify_challenge(authkey, message, response):
836890
"""Verify MAC challenge
837891
838-
If our message did not include a digestmod prefix, the client is allowed
839-
to select a stronger digestmod (HMAC-MD5 legacy to HMAC-SHA2-256).
892+
If our message did not include a digest_name prefix, the client is allowed
893+
to select a stronger digest_name from _ALLOWED_DIGESTS.
840894
841895
In case our message is prefixed, a client cannot downgrade to a weaker
842896
algorithm, because the MAC is calculated over the entire message
843-
including the '{digestmod}' prefix.
897+
including the '{digest_name}' prefix.
844898
"""
845899
import hmac
846-
mo = _mac_algo_re.match(response)
847-
if mo is not None:
848-
# get digestmod from response.
849-
digestmod = mo.group('digestmod').decode('ascii')
850-
mac = mo.group('payload')
851-
else:
852-
digestmod = 'md5'
853-
mac = response
900+
response_digest, response_mac = _get_digest_name_and_payload(response)
901+
response_digest = response_digest or 'md5'
854902
try:
855-
expected = hmac.new(authkey, message, digestmod).digest()
903+
expected = hmac.new(authkey, message, response_digest).digest()
856904
except ValueError:
857-
raise AuthenticationError(f'unsupported digest {digestmod}')
858-
if not hmac.compare_digest(expected, mac):
905+
raise AuthenticationError(f'{response_digest=} unsupported')
906+
if len(expected) != len(response_mac):
907+
raise AuthenticationError(
908+
f'expected {response_digest!r} of length {len(expected)} '
909+
f'got {len(response_mac)}')
910+
if not hmac.compare_digest(expected, response_mac):
859911
raise AuthenticationError('digest received was wrong')
860-
return True
861912

862913

863-
def deliver_challenge(connection, authkey, digestmod=None):
914+
def deliver_challenge(connection, authkey: bytes, digest_name='sha256'):
864915
if not isinstance(authkey, bytes):
865916
raise ValueError(
866917
"Authkey must be bytes, not {0!s}".format(type(authkey)))
918+
assert MESSAGE_LENGTH > _MD5ONLY_MESSAGE_LENGTH, "protocol constraint"
867919
message = os.urandom(MESSAGE_LENGTH)
868-
if digestmod is not None:
869-
message = b'{%s}%s' % (digestmod.encode('ascii'), message)
870-
connection.send_bytes(CHALLENGE + message)
920+
message = b'{%s}%s' % (digest_name.encode('ascii'), message)
921+
# Even when sending a challenge to a legacy client that does not support
922+
# digest prefixes, they'll take the entire thing as a challenge and
923+
# respond to it with a raw HMAC-MD5.
924+
connection.send_bytes(_CHALLENGE + message)
871925
response = connection.recv_bytes(256) # reject large message
872926
try:
873927
_verify_challenge(authkey, message, response)
874928
except AuthenticationError:
875-
connection.send_bytes(FAILURE)
929+
connection.send_bytes(_FAILURE)
876930
raise
877931
else:
878-
connection.send_bytes(WELCOME)
932+
connection.send_bytes(_WELCOME)
933+
879934

880-
def answer_challenge(connection, authkey):
935+
def answer_challenge(connection, authkey: bytes):
881936
if not isinstance(authkey, bytes):
882937
raise ValueError(
883938
"Authkey must be bytes, not {0!s}".format(type(authkey)))
884939
message = connection.recv_bytes(256) # reject large message
885-
assert message[:len(CHALLENGE)] == CHALLENGE, 'message = %r' % message
886-
message = message[len(CHALLENGE):]
940+
if not message.startswith(_CHALLENGE):
941+
raise AuthenticationError(
942+
f'Protocol error, expected challenge: {message=}')
943+
message = message[len(_CHALLENGE):]
944+
if len(message) < _MD5ONLY_MESSAGE_LENGTH:
945+
raise AuthenticationError('challenge too short: {len(message)} bytes')
887946
digest = _create_response(authkey, message)
888947
connection.send_bytes(digest)
889948
response = connection.recv_bytes(256) # reject large message
890-
if response != WELCOME:
949+
if response != _WELCOME:
891950
raise AuthenticationError('digest sent was rejected')
892951

893952
#

Lib/test/_test_multiprocessing.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,7 +3041,7 @@ def test_remote(self):
30413041
del queue
30423042

30433043

3044-
@hashlib_helper.requires_hashdigest('md5')
3044+
@hashlib_helper.requires_hashdigest('sha256')
30453045
class _TestManagerRestart(BaseTestCase):
30463046

30473047
@classmethod
@@ -3530,7 +3530,7 @@ def test_dont_merge(self):
35303530
#
35313531

35323532
@unittest.skipUnless(HAS_REDUCTION, "test needs multiprocessing.reduction")
3533-
@hashlib_helper.requires_hashdigest('md5')
3533+
@hashlib_helper.requires_hashdigest('sha256')
35343534
class _TestPicklingConnections(BaseTestCase):
35353535

35363536
ALLOWED_TYPES = ('processes',)
@@ -3833,7 +3833,7 @@ def test_copy(self):
38333833

38343834

38353835
@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
3836-
@hashlib_helper.requires_hashdigest('md5')
3836+
@hashlib_helper.requires_hashdigest('sha256')
38373837
class _TestSharedMemory(BaseTestCase):
38383838

38393839
ALLOWED_TYPES = ('processes',)
@@ -4635,7 +4635,7 @@ def test_invalid_handles(self):
46354635

46364636

46374637

4638-
@hashlib_helper.requires_hashdigest('md5')
4638+
@hashlib_helper.requires_hashdigest('sha256')
46394639
class OtherTest(unittest.TestCase):
46404640
# TODO: add more tests for deliver/answer challenge.
46414641
def test_deliver_challenge_auth_failure(self):
@@ -4655,7 +4655,7 @@ def __init__(self):
46554655
def recv_bytes(self, size):
46564656
self.count += 1
46574657
if self.count == 1:
4658-
return multiprocessing.connection.CHALLENGE
4658+
return multiprocessing.connection._CHALLENGE
46594659
elif self.count == 2:
46604660
return b'something bogus'
46614661
return b''
@@ -4683,16 +4683,25 @@ def verify_challenge(self, message, response):
46834683

46844684
def test_challengeresponse(self):
46854685
for algo in [None, "md5", "sha256"]:
4686-
msg = b'mymessage'
4687-
if algo is not None:
4688-
prefix = b'{%s}' % algo.encode("ascii")
4689-
else:
4690-
prefix = b''
4691-
msg = prefix + msg
4692-
response = self.create_response(msg)
4693-
if not response.startswith(prefix):
4694-
self.fail(response)
4695-
self.verify_challenge(msg, response)
4686+
with self.subTest(f"{algo=}"):
4687+
msg = b'is-twenty-bytes-long' # The length of a legacy message.
4688+
if algo:
4689+
prefix = b'{%s}' % algo.encode("ascii")
4690+
else:
4691+
prefix = b''
4692+
msg = prefix + msg
4693+
response = self.create_response(msg)
4694+
if not response.startswith(prefix):
4695+
self.fail(response)
4696+
self.verify_challenge(msg, response)
4697+
4698+
# TODO(gpshead): We need integration tests for handshakes between modern
4699+
# deliver_challenge() and verify_response() code and connections running a
4700+
# test-local copy of the legacy Python <=3.11 implementations.
4701+
4702+
# TODO(gpshead): properly annotate tests for requires_hashdigest rather than
4703+
# only running these on a platform supporting everything. otherwise logic
4704+
# issues preventing it from working on FIPS mode setups will be hidden.
46964705

46974706
#
46984707
# Test Manager.start()/Pool.__init__() initializer feature - see issue 5585
@@ -4701,7 +4710,7 @@ def test_challengeresponse(self):
47014710
def initializer(ns):
47024711
ns.test += 1
47034712

4704-
@hashlib_helper.requires_hashdigest('md5')
4713+
@hashlib_helper.requires_hashdigest('sha256')
47054714
class TestInitializers(unittest.TestCase):
47064715
def setUp(self):
47074716
self.mgr = multiprocessing.Manager()
@@ -5561,7 +5570,7 @@ def is_alive(self):
55615570
any(process.is_alive() for process in forked_processes))
55625571

55635572

5564-
@hashlib_helper.requires_hashdigest('md5')
5573+
@hashlib_helper.requires_hashdigest('sha256')
55655574
class TestSyncManagerTypes(unittest.TestCase):
55665575
"""Test all the types which can be shared between a parent and a
55675576
child process by using a manager which acts as an intermediary
@@ -5993,7 +6002,7 @@ def install_tests_in_module_dict(remote_globs, start_method):
59936002
class Temp(base, Mixin, unittest.TestCase):
59946003
pass
59956004
if type_ == 'manager':
5996-
Temp = hashlib_helper.requires_hashdigest('md5')(Temp)
6005+
Temp = hashlib_helper.requires_hashdigest('sha256')(Temp)
59976006
Temp.__name__ = Temp.__qualname__ = newname
59986007
Temp.__module__ = __module__
59996008
remote_globs[newname] = Temp
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
:mod:`multiprocessing` supports stronger HMAC algorithms
1+
:mod:`multiprocessing` now supports stronger HMAC algorithms for inter-process
2+
connection authentication rather than only HMAC-MD5.

0 commit comments

Comments
 (0)