Skip to content

Commit a66f279

Browse files
bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660)
Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by limiting the number of `MiniFieldStorage` objects created by `FieldStorage`. (cherry picked from commit 2091448) Co-authored-by: matthewbelisle-wf <[email protected]>
1 parent d6d35d0 commit a66f279

File tree

5 files changed

+102
-12
lines changed

5 files changed

+102
-12
lines changed

Lib/cgi.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,8 @@ class FieldStorage:
328328
"""
329329
def __init__(self, fp=None, headers=None, outerboundary=b'',
330330
environ=os.environ, keep_blank_values=0, strict_parsing=0,
331-
limit=None, encoding='utf-8', errors='replace'):
331+
limit=None, encoding='utf-8', errors='replace',
332+
max_num_fields=None):
332333
"""Constructor. Read multipart/* until last part.
333334
334335
Arguments, all optional:
@@ -368,10 +369,14 @@ def __init__(self, fp=None, headers=None, outerboundary=b'',
368369
for the page sending the form (content-type : meta http-equiv or
369370
header)
370371
372+
max_num_fields: int. If set, then __init__ throws a ValueError
373+
if there are more than n fields read by parse_qsl().
374+
371375
"""
372376
method = 'GET'
373377
self.keep_blank_values = keep_blank_values
374378
self.strict_parsing = strict_parsing
379+
self.max_num_fields = max_num_fields
375380
if 'REQUEST_METHOD' in environ:
376381
method = environ['REQUEST_METHOD'].upper()
377382
self.qs_on_post = None
@@ -595,12 +600,11 @@ def read_urlencoded(self):
595600
qs = qs.decode(self.encoding, self.errors)
596601
if self.qs_on_post:
597602
qs += '&' + self.qs_on_post
598-
self.list = []
599603
query = urllib.parse.parse_qsl(
600604
qs, self.keep_blank_values, self.strict_parsing,
601-
encoding=self.encoding, errors=self.errors)
602-
for key, value in query:
603-
self.list.append(MiniFieldStorage(key, value))
605+
encoding=self.encoding, errors=self.errors,
606+
max_num_fields=self.max_num_fields)
607+
self.list = [MiniFieldStorage(key, value) for key, value in query]
604608
self.skip_lines()
605609

606610
FieldStorageClass = None
@@ -614,9 +618,9 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
614618
if self.qs_on_post:
615619
query = urllib.parse.parse_qsl(
616620
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
617-
encoding=self.encoding, errors=self.errors)
618-
for key, value in query:
619-
self.list.append(MiniFieldStorage(key, value))
621+
encoding=self.encoding, errors=self.errors,
622+
max_num_fields=self.max_num_fields)
623+
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
620624

621625
klass = self.FieldStorageClass or self.__class__
622626
first_line = self.fp.readline() # bytes
@@ -650,11 +654,23 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
650654
if 'content-length' in headers:
651655
del headers['content-length']
652656

657+
# Propagate max_num_fields into the sub class appropriately
658+
sub_max_num_fields = self.max_num_fields
659+
if sub_max_num_fields is not None:
660+
sub_max_num_fields -= len(self.list)
661+
653662
part = klass(self.fp, headers, ib, environ, keep_blank_values,
654663
strict_parsing,self.limit-self.bytes_read,
655-
self.encoding, self.errors)
664+
self.encoding, self.errors, sub_max_num_fields)
665+
666+
max_num_fields = self.max_num_fields
667+
if max_num_fields is not None and part.list:
668+
max_num_fields -= len(part.list)
669+
656670
self.bytes_read += part.bytes_read
657671
self.list.append(part)
672+
if max_num_fields is not None and max_num_fields < len(self.list):
673+
raise ValueError('Max number of fields exceeded')
658674
if part.done or self.bytes_read >= self.length > 0:
659675
break
660676
self.skip_lines()

Lib/test/test_cgi.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,55 @@ def testQSAndUrlEncode(self):
391391
v = gen_result(data, environ)
392392
self.assertEqual(self._qs_result, v)
393393

394+
def test_max_num_fields(self):
395+
# For application/x-www-form-urlencoded
396+
data = '&'.join(['a=a']*11)
397+
environ = {
398+
'CONTENT_LENGTH': str(len(data)),
399+
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
400+
'REQUEST_METHOD': 'POST',
401+
}
402+
403+
with self.assertRaises(ValueError):
404+
cgi.FieldStorage(
405+
fp=BytesIO(data.encode()),
406+
environ=environ,
407+
max_num_fields=10,
408+
)
409+
410+
# For multipart/form-data
411+
data = """---123
412+
Content-Disposition: form-data; name="a"
413+
414+
a
415+
---123
416+
Content-Type: application/x-www-form-urlencoded
417+
418+
a=a&a=a
419+
---123--
420+
"""
421+
environ = {
422+
'CONTENT_LENGTH': str(len(data)),
423+
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
424+
'QUERY_STRING': 'a=a&a=a',
425+
'REQUEST_METHOD': 'POST',
426+
}
427+
428+
# 2 GET entities
429+
# 2 top level POST entities
430+
# 2 entities within the second POST entity
431+
with self.assertRaises(ValueError):
432+
cgi.FieldStorage(
433+
fp=BytesIO(data.encode()),
434+
environ=environ,
435+
max_num_fields=5,
436+
)
437+
cgi.FieldStorage(
438+
fp=BytesIO(data.encode()),
439+
environ=environ,
440+
max_num_fields=6,
441+
)
442+
394443
def testQSAndFormData(self):
395444
data = """---123
396445
Content-Disposition: form-data; name="key2"

Lib/test/test_urlparse.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,13 @@ def test_parse_qsl_encoding(self):
879879
errors="ignore")
880880
self.assertEqual(result, [('key', '\u0141-')])
881881

882+
def test_parse_qsl_max_num_fields(self):
883+
with self.assertRaises(ValueError):
884+
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
885+
with self.assertRaises(ValueError):
886+
urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
887+
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
888+
882889
def test_urlencode_sequences(self):
883890
# Other tests incidentally urlencode things; test non-covered cases:
884891
# Sequence and object values.

Lib/urllib/parse.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,7 +623,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
623623

624624

625625
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
626-
encoding='utf-8', errors='replace'):
626+
encoding='utf-8', errors='replace', max_num_fields=None):
627627
"""Parse a query given as a string argument.
628628
629629
Arguments:
@@ -644,11 +644,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
644644
encoding and errors: specify how to decode percent-encoded sequences
645645
into Unicode characters, as accepted by the bytes.decode() method.
646646
647+
max_num_fields: int. If set, then throws a ValueError if there
648+
are more than n fields read by parse_qsl().
649+
647650
Returns a dictionary.
648651
"""
649652
parsed_result = {}
650653
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
651-
encoding=encoding, errors=errors)
654+
encoding=encoding, errors=errors,
655+
max_num_fields=max_num_fields)
652656
for name, value in pairs:
653657
if name in parsed_result:
654658
parsed_result[name].append(value)
@@ -658,7 +662,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
658662

659663

660664
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
661-
encoding='utf-8', errors='replace'):
665+
encoding='utf-8', errors='replace', max_num_fields=None):
662666
"""Parse a query given as a string argument.
663667
664668
Arguments:
@@ -678,9 +682,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
678682
encoding and errors: specify how to decode percent-encoded sequences
679683
into Unicode characters, as accepted by the bytes.decode() method.
680684
685+
max_num_fields: int. If set, then throws a ValueError
686+
if there are more than n fields read by parse_qsl().
687+
681688
Returns a list, as G-d intended.
682689
"""
683690
qs, _coerce_result = _coerce_args(qs)
691+
692+
# If max_num_fields is defined then check that the number of fields
693+
# is less than max_num_fields. This prevents a memory exhaustion DOS
694+
# attack via post bodies with many fields.
695+
if max_num_fields is not None:
696+
num_fields = 1 + qs.count('&') + qs.count(';')
697+
if max_num_fields < num_fields:
698+
raise ValueError('Max number of fields exceeded')
699+
684700
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
685701
r = []
686702
for name_value in pairs:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
2+
limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.

0 commit comments

Comments
 (0)