Skip to content

Commit 2091448

Browse files
matthewbelisle-wfmiss-islington
authored andcommitted
bpo-34866: Adding max_num_fields to cgi.FieldStorage (GH-9660)
Adding `max_num_fields` to `cgi.FieldStorage` to make DOS attacks harder by limiting the number of `MiniFieldStorage` objects created by `FieldStorage`.
1 parent f081fd8 commit 2091448

File tree

5 files changed

+102
-12
lines changed

5 files changed

+102
-12
lines changed

Lib/cgi.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,8 @@ class FieldStorage:
311311
"""
312312
def __init__(self, fp=None, headers=None, outerboundary=b'',
313313
environ=os.environ, keep_blank_values=0, strict_parsing=0,
314-
limit=None, encoding='utf-8', errors='replace'):
314+
limit=None, encoding='utf-8', errors='replace',
315+
max_num_fields=None):
315316
"""Constructor. Read multipart/* until last part.
316317
317318
Arguments, all optional:
@@ -351,10 +352,14 @@ def __init__(self, fp=None, headers=None, outerboundary=b'',
351352
for the page sending the form (content-type : meta http-equiv or
352353
header)
353354
355+
max_num_fields: int. If set, then __init__ throws a ValueError
356+
if there are more than n fields read by parse_qsl().
357+
354358
"""
355359
method = 'GET'
356360
self.keep_blank_values = keep_blank_values
357361
self.strict_parsing = strict_parsing
362+
self.max_num_fields = max_num_fields
358363
if 'REQUEST_METHOD' in environ:
359364
method = environ['REQUEST_METHOD'].upper()
360365
self.qs_on_post = None
@@ -578,12 +583,11 @@ def read_urlencoded(self):
578583
qs = qs.decode(self.encoding, self.errors)
579584
if self.qs_on_post:
580585
qs += '&' + self.qs_on_post
581-
self.list = []
582586
query = urllib.parse.parse_qsl(
583587
qs, self.keep_blank_values, self.strict_parsing,
584-
encoding=self.encoding, errors=self.errors)
585-
for key, value in query:
586-
self.list.append(MiniFieldStorage(key, value))
588+
encoding=self.encoding, errors=self.errors,
589+
max_num_fields=self.max_num_fields)
590+
self.list = [MiniFieldStorage(key, value) for key, value in query]
587591
self.skip_lines()
588592

589593
FieldStorageClass = None
@@ -597,9 +601,9 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
597601
if self.qs_on_post:
598602
query = urllib.parse.parse_qsl(
599603
self.qs_on_post, self.keep_blank_values, self.strict_parsing,
600-
encoding=self.encoding, errors=self.errors)
601-
for key, value in query:
602-
self.list.append(MiniFieldStorage(key, value))
604+
encoding=self.encoding, errors=self.errors,
605+
max_num_fields=self.max_num_fields)
606+
self.list.extend(MiniFieldStorage(key, value) for key, value in query)
603607

604608
klass = self.FieldStorageClass or self.__class__
605609
first_line = self.fp.readline() # bytes
@@ -633,11 +637,23 @@ def read_multi(self, environ, keep_blank_values, strict_parsing):
633637
if 'content-length' in headers:
634638
del headers['content-length']
635639

640+
# Propagate max_num_fields into the sub class appropriately
641+
sub_max_num_fields = self.max_num_fields
642+
if sub_max_num_fields is not None:
643+
sub_max_num_fields -= len(self.list)
644+
636645
part = klass(self.fp, headers, ib, environ, keep_blank_values,
637646
strict_parsing,self.limit-self.bytes_read,
638-
self.encoding, self.errors)
647+
self.encoding, self.errors, sub_max_num_fields)
648+
649+
max_num_fields = self.max_num_fields
650+
if max_num_fields is not None and part.list:
651+
max_num_fields -= len(part.list)
652+
639653
self.bytes_read += part.bytes_read
640654
self.list.append(part)
655+
if max_num_fields is not None and max_num_fields < len(self.list):
656+
raise ValueError('Max number of fields exceeded')
641657
if part.done or self.bytes_read >= self.length > 0:
642658
break
643659
self.skip_lines()

Lib/test/test_cgi.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,55 @@ def testQSAndUrlEncode(self):
381381
v = gen_result(data, environ)
382382
self.assertEqual(self._qs_result, v)
383383

384+
def test_max_num_fields(self):
385+
# For application/x-www-form-urlencoded
386+
data = '&'.join(['a=a']*11)
387+
environ = {
388+
'CONTENT_LENGTH': str(len(data)),
389+
'CONTENT_TYPE': 'application/x-www-form-urlencoded',
390+
'REQUEST_METHOD': 'POST',
391+
}
392+
393+
with self.assertRaises(ValueError):
394+
cgi.FieldStorage(
395+
fp=BytesIO(data.encode()),
396+
environ=environ,
397+
max_num_fields=10,
398+
)
399+
400+
# For multipart/form-data
401+
data = """---123
402+
Content-Disposition: form-data; name="a"
403+
404+
a
405+
---123
406+
Content-Type: application/x-www-form-urlencoded
407+
408+
a=a&a=a
409+
---123--
410+
"""
411+
environ = {
412+
'CONTENT_LENGTH': str(len(data)),
413+
'CONTENT_TYPE': 'multipart/form-data; boundary=-123',
414+
'QUERY_STRING': 'a=a&a=a',
415+
'REQUEST_METHOD': 'POST',
416+
}
417+
418+
# 2 GET entities
419+
# 2 top level POST entities
420+
# 2 entities within the second POST entity
421+
with self.assertRaises(ValueError):
422+
cgi.FieldStorage(
423+
fp=BytesIO(data.encode()),
424+
environ=environ,
425+
max_num_fields=5,
426+
)
427+
cgi.FieldStorage(
428+
fp=BytesIO(data.encode()),
429+
environ=environ,
430+
max_num_fields=6,
431+
)
432+
384433
def testQSAndFormData(self):
385434
data = """---123
386435
Content-Disposition: form-data; name="key2"

Lib/test/test_urlparse.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,6 +880,13 @@ def test_parse_qsl_encoding(self):
880880
errors="ignore")
881881
self.assertEqual(result, [('key', '\u0141-')])
882882

883+
def test_parse_qsl_max_num_fields(self):
884+
with self.assertRaises(ValueError):
885+
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
886+
with self.assertRaises(ValueError):
887+
urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
888+
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
889+
883890
def test_urlencode_sequences(self):
884891
# Other tests incidentally urlencode things; test non-covered cases:
885892
# Sequence and object values.

Lib/urllib/parse.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
628628

629629

630630
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
631-
encoding='utf-8', errors='replace'):
631+
encoding='utf-8', errors='replace', max_num_fields=None):
632632
"""Parse a query given as a string argument.
633633
634634
Arguments:
@@ -649,11 +649,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
649649
encoding and errors: specify how to decode percent-encoded sequences
650650
into Unicode characters, as accepted by the bytes.decode() method.
651651
652+
max_num_fields: int. If set, then throws a ValueError if there
653+
are more than n fields read by parse_qsl().
654+
652655
Returns a dictionary.
653656
"""
654657
parsed_result = {}
655658
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
656-
encoding=encoding, errors=errors)
659+
encoding=encoding, errors=errors,
660+
max_num_fields=max_num_fields)
657661
for name, value in pairs:
658662
if name in parsed_result:
659663
parsed_result[name].append(value)
@@ -663,7 +667,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
663667

664668

665669
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
666-
encoding='utf-8', errors='replace'):
670+
encoding='utf-8', errors='replace', max_num_fields=None):
667671
"""Parse a query given as a string argument.
668672
669673
Arguments:
@@ -683,9 +687,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
683687
encoding and errors: specify how to decode percent-encoded sequences
684688
into Unicode characters, as accepted by the bytes.decode() method.
685689
690+
max_num_fields: int. If set, then throws a ValueError
691+
if there are more than n fields read by parse_qsl().
692+
686693
Returns a list, as G-d intended.
687694
"""
688695
qs, _coerce_result = _coerce_args(qs)
696+
697+
# If max_num_fields is defined then check that the number of fields
698+
# is less than max_num_fields. This prevents a memory exhaustion DOS
699+
# attack via post bodies with many fields.
700+
if max_num_fields is not None:
701+
num_fields = 1 + qs.count('&') + qs.count(';')
702+
if max_num_fields < num_fields:
703+
raise ValueError('Max number of fields exceeded')
704+
689705
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
690706
r = []
691707
for name_value in pairs:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by
2+
limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.

0 commit comments

Comments
 (0)