Skip to content

Commit 8c99017

Browse files
Merge pull request #57 from pyupio/security/remove-intensive-regex
Removing index server validation
2 parents 3290bb5 + d87364f commit 8c99017

File tree

2 files changed

+8
-41
lines changed

2 files changed

+8
-41
lines changed

dparse/parser.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from configparser import ConfigParser, NoOptionError
99

1010

11-
from .regex import URL_REGEX, HASH_REGEX
11+
from .regex import HASH_REGEX
1212

1313
from .dependencies import DependencyFile, Dependency
1414
from packaging.requirements import Requirement as PackagingRequirement, InvalidRequirement
@@ -175,10 +175,11 @@ def parse_index_server(cls, line):
175175
:param line:
176176
:return:
177177
"""
178-
matches = URL_REGEX.findall(line)
179-
if matches:
180-
url = matches[0]
181-
return url if url.endswith("/") else url + "/"
178+
groups = re.split(pattern="[=\s]+", string=line.strip(), maxsplit=100)
179+
180+
if len(groups) >= 2:
181+
return groups[1] if groups[1].endswith("/") else groups[1] + "/"
182+
182183
return None
183184

184185
@classmethod
@@ -346,6 +347,7 @@ def parse(self):
346347
except (toml.TomlDecodeError, IndexError) as e:
347348
pass
348349

350+
349351
class PipfileLockParser(Parser):
350352

351353
def parse(self):

dparse/regex.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,4 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, unicode_literals
33

4-
import re
5-
# see https://gist.github.com/dperini/729294
6-
URL_REGEX = re.compile(
7-
# protocol identifier
8-
"(?:(?:https?|ftp)://)"
9-
# user:pass authentication
10-
"(?:\S+(?::\S*)?@)?"
11-
"(?:"
12-
# IP address exclusion
13-
# private & local networks
14-
"(?!(?:10|127)(?:\.\d{1,3}){3})"
15-
"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
16-
"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
17-
# IP address dotted notation octets
18-
# excludes loopback network 0.0.0.0
19-
# excludes reserved space >= 224.0.0.0
20-
# excludes network & broadcast addresses
21-
# (first & last IP address of each class)
22-
"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
23-
"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
24-
"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
25-
"|"
26-
# host name
27-
"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)"
28-
# domain name
29-
"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*"
30-
# TLD identifier
31-
"(?:\.(?:[a-z\u00a1-\uffff]{2,}))"
32-
")"
33-
# port number
34-
"(?::\d{2,5})?"
35-
# resource path
36-
"(?:/\S*)?",
37-
re.UNICODE)
38-
39-
HASH_REGEX = r"--hash[=| ][\w]+:[\w]+"
4+
HASH_REGEX = r"--hash[=| ]\w+:\w+"

0 commit comments

Comments
 (0)