From d846e2b0c2b2870e68757897bc3b1bbc104ce43f Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Mon, 1 Oct 2018 16:12:56 -0500 Subject: [PATCH 1/6] Adding max_num_fields to cgi.FieldStorage --- Lib/cgi.py | 22 +++++---- Lib/test/test_cgi.py | 45 +++++++++++++++++++ Lib/test/test_urlparse.py | 7 +++ Lib/urllib/parse.py | 27 +++++++++-- .../2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst | 2 + 5 files changed, 92 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst diff --git a/Lib/cgi.py b/Lib/cgi.py index b655a057d4be48..5b3c3223d64346 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -311,7 +311,8 @@ class FieldStorage: """ def __init__(self, fp=None, headers=None, outerboundary=b'', environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace'): + limit=None, encoding='utf-8', errors='replace', + max_num_fields=None): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -351,10 +352,14 @@ def __init__(self, fp=None, headers=None, outerboundary=b'', for the page sending the form (content-type : meta http-equiv or header) + max_num_fields: Integer. If set, then __init__ throws an ValueError + if there are more than n fields read by parse_qsl(). + """ method = 'GET' self.keep_blank_values = keep_blank_values self.strict_parsing = strict_parsing + self.max_num_fields = max_num_fields if 'REQUEST_METHOD' in environ: method = environ['REQUEST_METHOD'].upper() self.qs_on_post = None @@ -578,12 +583,11 @@ def read_urlencoded(self): qs = qs.decode(self.encoding, self.errors) if self.qs_on_post: qs += '&' + self.qs_on_post - self.list = [] query = urllib.parse.parse_qsl( qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors) - for key, value in query: - self.list.append(MiniFieldStorage(key, value)) + encoding=self.encoding, errors=self.errors, + max_num_fields=self.max_num_fields) + self.list = [MiniFieldStorage(key, value) for key, value in query] self.skip_lines() FieldStorageClass = None @@ -597,9 +601,9 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): if self.qs_on_post: query = urllib.parse.parse_qsl( self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors) - for key, value in query: - self.list.append(MiniFieldStorage(key, value)) + encoding=self.encoding, errors=self.errors, + max_num_fields=self.max_num_fields) + self.list.extend(MiniFieldStorage(key, value) for key, value in query) klass = self.FieldStorageClass or self.__class__ first_line = self.fp.readline() # bytes @@ -638,6 +642,8 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): self.encoding, self.errors) self.bytes_read += part.bytes_read self.list.append(part) + if self.max_num_fields is not None and self.max_num_fields < len(self.list): + raise ValueError('Max number of fields exceeded') if part.done or self.bytes_read >= self.length > 0: break self.skip_lines() diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index b284a0d0980586..568e219fbcf25f 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -381,6 +381,51 @@ def testQSAndUrlEncode(self): v = gen_result(data, environ) self.assertEqual(self._qs_result, v) + def test_max_num_fields(self): + # For application/x-www-form-urlencoded + data = '&'.join(['a=a']*11) + environ = { + 'CONTENT_LENGTH': str(len(data)), + 'CONTENT_TYPE': 'application/x-www-form-urlencoded', + 'REQUEST_METHOD': 'POST', + } + + with self.assertRaises(ValueError): + form = cgi.FieldStorage( + fp=BytesIO(data.encode('ascii')), + environ=environ, + max_num_fields=10, + ) + + # For multipart/form-data + data = """---123 +Content-Disposition: form-data; name="a" + +a +---123 +Content-Disposition: form-data; name="a" + +a +---123 +Content-Disposition: form-data; name="a" + +a +---123-- +""" + environ = { + 'CONTENT_LENGTH': str(len(data)), + 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', + 'QUERY_STRING': 'a=a&a=a', + 'REQUEST_METHOD': 'POST', + } + + with self.assertRaises(ValueError): + form = cgi.FieldStorage( + fp=BytesIO(data.encode('ascii')), + environ=environ, + max_num_fields=4, + ) + def testQSAndFormData(self): data = """---123 Content-Disposition: form-data; name="key2" diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index cd3eabb5606b22..6738863f8def08 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -880,6 +880,13 @@ def test_parse_qsl_encoding(self): errors="ignore") self.assertEqual(result, [('key', '\u0141-')]) + def test_parse_qsl_max_num_fields(self): + with self.assertRaises(ValueError): + urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) + with self.assertRaises(ValueError): + urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10) + urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) + def test_urlencode_sequences(self): # Other tests incidentally urlencode things; test non-covered cases: # Sequence and object values. diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f21b8eb49cca22..cd8b26035a5499 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -628,7 +628,7 @@ def unquote(string, encoding='utf-8', errors='replace'): def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): + encoding='utf-8', errors='replace', max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -649,11 +649,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. + max_num_fields: Integer. If set, then throws an ValueError if there + are more than n fields read by parse_qsl(). + Returns a dictionary. """ parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, - encoding=encoding, errors=errors) + encoding=encoding, errors=errors, + max_num_fields=max_num_fields) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -662,8 +666,13 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, return parsed_result +# Used for checking parse_qsl() with max_num_fields. Both & and ; are valid query +# string delimiters. +_QS_DELIMITER_RE = re.compile(r'[&;]') + + def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace'): + encoding='utf-8', errors='replace', max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -683,9 +692,21 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. + max_num_fields: Integer. If set, then throws an ValueError + if there are more than n fields read by parse_qsl(). + Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) + + # If max_num_fields is defined then check that the number of fields + # is less than max_num_fields. This prevents a memory exhaustion DOS + # attack via post bodies with many fields. + if max_num_fields is not None: + for num_fields, _ in enumerate(_QS_DELIMITER_RE.finditer(qs), 2): + if max_num_fields < num_fields: + raise ValueError('Max number of fields exceeded') + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: diff --git a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst new file mode 100644 index 00000000000000..c8f6b34ee94526 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst @@ -0,0 +1,2 @@ +Adding max_num_fields to cgi.FieldStorage to make DOS attacks harder by +limiting the number of MiniFieldStorage objects created by FieldStorage. From 21e5e488a499ae10003e45f338e2cbc0e6929ffd Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Wed, 3 Oct 2018 16:01:27 -0500 Subject: [PATCH 2/6] Propagating max_num_fields to FieldStorage subclass --- Lib/cgi.py | 14 ++++++++++++-- Lib/test/test_cgi.py | 26 +++++++++++++++----------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/Lib/cgi.py b/Lib/cgi.py index 5b3c3223d64346..c74bc7ed8e1e22 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -637,12 +637,22 @@ def read_multi(self, environ, keep_blank_values, strict_parsing): if 'content-length' in headers: del headers['content-length'] + # Propagate max_num_fields into the sub class appropriately + sub_max_num_fields = self.max_num_fields + if sub_max_num_fields is not None: + sub_max_num_fields -= len(self.list) + part = klass(self.fp, headers, ib, environ, keep_blank_values, strict_parsing,self.limit-self.bytes_read, - self.encoding, self.errors) + self.encoding, self.errors, sub_max_num_fields) + + max_num_fields = self.max_num_fields + if max_num_fields is not None and part.list: + max_num_fields -= len(part.list) + self.bytes_read += part.bytes_read self.list.append(part) - if self.max_num_fields is not None and self.max_num_fields < len(self.list): + if max_num_fields is not None and max_num_fields < len(self.list): raise ValueError('Max number of fields exceeded') if part.done or self.bytes_read >= self.length > 0: break diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index 568e219fbcf25f..8ea9d6aee6c44d 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -391,8 +391,8 @@ def test_max_num_fields(self): } with self.assertRaises(ValueError): - form = cgi.FieldStorage( - fp=BytesIO(data.encode('ascii')), + cgi.FieldStorage( + fp=BytesIO(data.encode()), environ=environ, max_num_fields=10, ) @@ -403,13 +403,9 @@ def test_max_num_fields(self): a ---123 -Content-Disposition: form-data; name="a" - -a ----123 -Content-Disposition: form-data; name="a" +Content-Type: application/x-www-form-urlencoded -a +a=a&a=a ---123-- """ environ = { @@ -419,12 +415,20 @@ def test_max_num_fields(self): 'REQUEST_METHOD': 'POST', } + # 2 GET entities + # 2 top level POST entities + # 2 entities within the second POST entity with self.assertRaises(ValueError): - form = cgi.FieldStorage( - fp=BytesIO(data.encode('ascii')), + cgi.FieldStorage( + fp=BytesIO(data.encode()), environ=environ, - max_num_fields=4, + max_num_fields=5, ) + cgi.FieldStorage( + fp=BytesIO(data.encode()), + environ=environ, + max_num_fields=6, + ) def testQSAndFormData(self): data = """---123 From 27352e9fc243339db4ca327482d96b471da77b88 Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Thu, 4 Oct 2018 09:11:29 -0500 Subject: [PATCH 3/6] Fixing typos --- Lib/cgi.py | 2 +- Lib/urllib/parse.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/cgi.py b/Lib/cgi.py index c74bc7ed8e1e22..972ee0f51310cb 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -352,7 +352,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'', for the page sending the form (content-type : meta http-equiv or header) - max_num_fields: Integer. If set, then __init__ throws an ValueError + max_num_fields: Integer. If set, then __init__ throws a ValueError if there are more than n fields read by parse_qsl(). """ diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index cd8b26035a5499..0ecccce9733ad5 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -649,7 +649,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. - max_num_fields: Integer. If set, then throws an ValueError if there + max_num_fields: Integer. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). Returns a dictionary. @@ -692,7 +692,7 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. - max_num_fields: Integer. If set, then throws an ValueError + max_num_fields: Integer. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). Returns a list, as G-d intended. From ab0eb93f2160686283064ae08b4ce71d5bbad9c3 Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Fri, 5 Oct 2018 10:47:54 -0500 Subject: [PATCH 4/6] Fixing annotation --- Lib/cgi.py | 2 +- Lib/urllib/parse.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/cgi.py b/Lib/cgi.py index 972ee0f51310cb..adf4dcba19acb0 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -352,7 +352,7 @@ def __init__(self, fp=None, headers=None, outerboundary=b'', for the page sending the form (content-type : meta http-equiv or header) - max_num_fields: Integer. If set, then __init__ throws a ValueError + max_num_fields: int. If set, then __init__ throws a ValueError if there are more than n fields read by parse_qsl(). """ diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 0ecccce9733ad5..9afa4f9fac8ed0 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -649,7 +649,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. - max_num_fields: Integer. If set, then throws a ValueError if there + max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). Returns a dictionary. @@ -692,7 +692,7 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding and errors: specify how to decode percent-encoded sequences into Unicode characters, as accepted by the bytes.decode() method. - max_num_fields: Integer. If set, then throws a ValueError + max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). Returns a list, as G-d intended. From 1fa59e4efd44aa3bf0cdcf7b9bd00a7c1ec3fcef Mon Sep 17 00:00:00 2001 From: Matt Belisle Date: Thu, 18 Oct 2018 11:10:11 -0500 Subject: [PATCH 5/6] Using count() instead of finditer() for max_num_fields check --- Lib/urllib/parse.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 9afa4f9fac8ed0..dc2171144fc8ba 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -666,11 +666,6 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, return parsed_result -# Used for checking parse_qsl() with max_num_fields. Both & and ; are valid query -# string delimiters. -_QS_DELIMITER_RE = re.compile(r'[&;]') - - def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None): """Parse a query given as a string argument. @@ -703,9 +698,9 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - for num_fields, _ in enumerate(_QS_DELIMITER_RE.finditer(qs), 2): - if max_num_fields < num_fields: - raise ValueError('Max number of fields exceeded') + num_fields = 1 + qs.count('&') + qs.count(';') + if max_num_fields < num_fields: + raise ValueError('Max number of fields exceeded') pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] From 20a77f2655d905d06c0e08d33e4e83236caca636 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Fri, 19 Oct 2018 19:34:44 +0900 Subject: [PATCH 6/6] Update 2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst --- .../next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst index c8f6b34ee94526..90c146ce834edf 100644 --- a/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst +++ b/Misc/NEWS.d/next/Library/2018-10-03-11-07-28.bpo-34866.ML6KpJ.rst @@ -1,2 +1,2 @@ -Adding max_num_fields to cgi.FieldStorage to make DOS attacks harder by -limiting the number of MiniFieldStorage objects created by FieldStorage. +Adding ``max_num_fields`` to ``cgi.FieldStorage`` to make DOS attacks harder by +limiting the number of ``MiniFieldStorage`` objects created by ``FieldStorage``.