Skip to content

Commit 322a914

Browse files
bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660)
Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by limiting the number of `MiniFieldStorage` objects created by `FieldStorage`. (cherry picked from commit 2091448) Co-authored-by: matthewbelisle-wf <[email protected]>
1 parent d85c272 commit 322a914

File tree

5 files changed

+102
-12
lines changed

5 files changed

+102
-12
lines changed

Lib/cgi.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,8 @@ class FieldStorage:
404404
"""
405405
def __init__(self, fp=None, headers=None, outerboundary=b'',
406406
environ=os.environ, keep_blank_values=0, strict_parsing=0,
407-
limit=None, encoding='utf-8', errors='replace'):
407+
limit=None, encoding='utf-8', errors='replace',
408+
max_num_fields=None):
408409
"""Constructor. Read multipart/* until last part.
409410
410411
Arguments, all optional:
@@ -444,10 +445,14 @@ def __init__(self, fp=None, headers=None, outerboundary=b'',
444445
for the page sending the form (content-type : meta http-equiv or
445446
header)
446447
448+
max_num_fields: int. If set, then __init__ throws a ValueError
449+
if there are more than n fields read by parse_qsl().
450+
447451
"""
448452
method = 'GET'
449453
self.keep_blank_values = keep_blank_values
450454
self.strict_parsing = strict_parsing
455+
self.max_num_fields = max_num_fields
451456
if 'REQUEST_METHOD' in environ:
452457
method = environ['REQUEST_METHOD'].upper()
453458
self.qs_on_post = None
@@ -670,12 +675,11 @@ def read_urlencoded(self):
670675
qs = qs.decode(self.encoding, self.errors)
671676
if self.qs_on_post:
672677
qs += '&' + self.qs_on_post
673-
self.list = []
674678
query = urllib.parse.parse_qsl(
675679
qs, self.keep_blank_values, self.strict_parsing,
676-
encoding=self.encoding, errors=self.errors)
677-
for key, value in query:
678-
self.list.append(MiniFieldStorage(key, value))
680+
encoding=self.encoding, errors=self.errors,
681+
max_num_fields=self.max_num_fields)
682+
self.list = [MiniFieldStorage(key, value) for key, value in query]
679683
self.skip_lines()
680684

681685
FieldStorageClass = None
@@ -689,9 +693,9 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
689693
if self.qs_on_post:
690694
query = urllib.parse.parse_qsl(
691695
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
692-
encoding=self.encoding, errors=self.errors)
693-
for key, value in query:
694-
self.list.append(MiniFieldStorage(key, value))
696+
encoding=self.encoding, errors=self.errors,
697+
max_num_fields=self.max_num_fields)
698+
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
695699

696700
klass = self.FieldStorageClass or self.__class__
697701
first_line = self.fp.readline() # bytes
@@ -725,11 +729,23 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
725729
if 'content-length' in headers:
726730
del headers['content-length']
727731

732+
# Propagate max_num_fields into the sub class appropriately
733+
sub_max_num_fields = self.max_num_fields
734+
if sub_max_num_fields is not None:
735+
sub_max_num_fields -= len(self.list)
736+
728737
part = klass(self.fp, headers, ib, environ, keep_blank_values,
729738
strict_parsing,self.limit-self.bytes_read,
730-
self.encoding, self.errors)
739+
self.encoding, self.errors, sub_max_num_fields)
740+
741+
max_num_fields = self.max_num_fields
742+
if max_num_fields is not None and part.list:
743+
max_num_fields -= len(part.list)
744+
731745
self.bytes_read += part.bytes_read
732746
self.list.append(part)
747+
if max_num_fields is not None and max_num_fields < len(self.list):
748+
raise ValueError('Max number of fields exceeded')
733749
if part.done or self.bytes_read >= self.length > 0:
734750
break
735751
self.skip_lines()

Lib/test/test_cgi.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,55 @@ def testQSAndUrlEncode(self):
373373
v = gen_result(data, environ)
374374
self.assertEqual(self._qs_result, v)
375375

376+
def test_max_num_fields(self):
377+
# For application/x-www-form-urlencoded
378+
data = '&'.join(['a=a']*11)
379+
environ = {
380+
'CONTENT_LENGTH': str(len(data)),
381+
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
382+
'REQUEST_METHOD': 'POST',
383+
}
384+
385+
with self.assertRaises(ValueError):
386+
cgi.FieldStorage(
387+
fp=BytesIO(data.encode()),
388+
environ=environ,
389+
max_num_fields=10,
390+
)
391+
392+
# For multipart/form-data
393+
data = """---123
394+
Content-Disposition: form-data; name="a"
395+
396+
a
397+
---123
398+
Content-Type: application/x-www-form-urlencoded
399+
400+
a=a&a=a
401+
---123--
402+
"""
403+
environ = {
404+
'CONTENT_LENGTH': str(len(data)),
405+
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
406+
'QUERY_STRING': 'a=a&a=a',
407+
'REQUEST_METHOD': 'POST',
408+
}
409+
410+
# 2 GET entities
411+
# 2 top level POST entities
412+
# 2 entities within the second POST entity
413+
with self.assertRaises(ValueError):
414+
cgi.FieldStorage(
415+
fp=BytesIO(data.encode()),
416+
environ=environ,
417+
max_num_fields=5,
418+
)
419+
cgi.FieldStorage(
420+
fp=BytesIO(data.encode()),
421+
environ=environ,
422+
max_num_fields=6,
423+
)
424+
376425
def testQSAndFormData(self):
377426
data = """---123
378427
Content-Disposition: form-data; name="key2"

Lib/test/test_urlparse.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,13 @@ def test_parse_qsl_encoding(self):
879879
errors="ignore")
880880
self.assertEqual(result, [('key', '\u0141-')])
881881

882+
def test_parse_qsl_max_num_fields(self):
883+
with self.assertRaises(ValueError):
884+
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
885+
with self.assertRaises(ValueError):
886+
urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
887+
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
888+
882889
def test_urlencode_sequences(self):
883890
# Other tests incidentally urlencode things; test non-covered cases:
884891
# Sequence and object values.

Lib/urllib/parse.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
624624

625625

626626
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
627-
encoding='utf-8', errors='replace'):
627+
encoding='utf-8', errors='replace', max_num_fields=None):
628628
"""Parse a query given as a string argument.
629629
630630
Arguments:
@@ -645,11 +645,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
645645
encoding and errors: specify how to decode percent-encoded sequences
646646
into Unicode characters, as accepted by the bytes.decode() method.
647647
648+
max_num_fields: int. If set, then throws a ValueError if there
649+
are more than n fields read by parse_qsl().
650+
648651
Returns a dictionary.
649652
"""
650653
parsed_result = {}
651654
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
652-
encoding=encoding, errors=errors)
655+
encoding=encoding, errors=errors,
656+
max_num_fields=max_num_fields)
653657
for name, value in pairs:
654658
if name in parsed_result:
655659
parsed_result[name].append(value)
@@ -659,7 +663,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
659663

660664

661665
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
662-
encoding='utf-8', errors='replace'):
666+
encoding='utf-8', errors='replace', max_num_fields=None):
663667
"""Parse a query given as a string argument.
664668
665669
Arguments:
@@ -679,9 +683,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
679683
encoding and errors: specify how to decode percent-encoded sequences
680684
into Unicode characters, as accepted by the bytes.decode() method.
681685
686+
max_num_fields: int. If set, then throws a ValueError
687+
if there are more than n fields read by parse_qsl().
688+
682689
Returns a list, as G-d intended.
683690
"""
684691
qs, _coerce_result = _coerce_args(qs)
692+
693+
# If max_num_fields is defined then check that the number of fields
694+
# is less than max_num_fields. This prevents a memory exhaustion DOS
695+
# attack via post bodies with many fields.
696+
if max_num_fields is not None:
697+
num_fields = 1 + qs.count('&') + qs.count(';')
698+
if max_num_fields < num_fields:
699+
raise ValueError('Max number of fields exceeded')
700+
685701
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
686702
r = []
687703
for name_value in pairs:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
2+
limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.

0 commit comments

Comments
 (0)