Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ Contributors (as ordered by Github)
* **[christinabo](https://github.com/christinabo)**

* Python 3 compatibility

* **[la55u](https://github.com/la55u)**

* JS unpacker
* PremProxy migration


**[More details](https://github.com/pgaref/HTTP_Request_Randomizer/contributors).**
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# HTTP Request Randomizer [![Build Status](https://travis-ci.org/pgaref/HTTP_Request_Randomizer.svg?branch=master)](https://travis-ci.org/pgaref/HTTP_Request_Randomizer) [![Coverage Status](https://coveralls.io/repos/github/pgaref/HTTP_Request_Randomizer/badge.svg?branch=master)](https://coveralls.io/github/pgaref/HTTP_Request_Randomizer?branch=master) [![Dependency Status](https://gemnasium.com/badges/github.com/pgaref/HTTP_Request_Randomizer.svg)](https://gemnasium.com/github.com/pgaref/HTTP_Request_Randomizer) [![PyPI version](https://badge.fury.io/py/http-request-randomizer.svg)](https://badge.fury.io/py/http-request-randomizer)
# HTTP Request Randomizer [![Build Status](https://travis-ci.org/pgaref/HTTP_Request_Randomizer.svg?branch=master)](https://travis-ci.org/pgaref/HTTP_Request_Randomizer) [![Coverage Status](https://coveralls.io/repos/github/pgaref/HTTP_Request_Randomizer/badge.svg?branch=master)](https://coveralls.io/github/pgaref/HTTP_Request_Randomizer?branch=master) [![Requirements Status](https://requires.io/github/pgaref/HTTP_Request_Randomizer/requirements.svg?branch=la55u-master)](https://requires.io/github/pgaref/HTTP_Request_Randomizer/requirements/?branch=la55u-master) [![PyPI version](https://badge.fury.io/py/http-request-randomizer.svg)](https://badge.fury.io/py/http-request-randomizer)

[Vietnamese version](README-vi.md)

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ This project is licensed under the terms of the MIT license.
:target: https://travis-ci.org/pgaref/HTTP_Request_Randomizer
.. |Coverage Status| image:: https://coveralls.io/repos/github/pgaref/HTTP_Request_Randomizer/badge.svg?branch=master
:target: https://coveralls.io/github/pgaref/HTTP_Request_Randomizer?branch=master
.. |Dependency Status| image:: https://gemnasium.com/badges/github.com/pgaref/HTTP_Request_Randomizer.svg
:target: https://gemnasium.com/github.com/pgaref/HTTP_Request_Randomizer
.. |Dependency Status| image:: https://requires.io/github/pgaref/HTTP_Request_Randomizer/requirements.svg?branch=la55u-master
:target: https://requires.io/github/pgaref/HTTP_Request_Randomizer/requirements/?branch=la55u-master
.. |PyPI version| image:: https://badge.fury.io/py/http-request-randomizer.svg
:target: https://badge.fury.io/py/http-request-randomizer
14 changes: 9 additions & 5 deletions http_request_randomizer/requests/parsers/FreeProxyParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from bs4 import BeautifulSoup

from http_request_randomizer.requests.parsers.UrlParser import UrlParser
from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel
from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel, Protocol

logger = logging.getLogger(__name__)
__author__ = 'pgaref'
Expand All @@ -30,7 +30,7 @@ def parse_proxyList(self):
headings = [th.get_text() for th in table.find("tr").find_all("th")]

datasets = []
for row in table.find_all("tr")[1:]:
for row in table.find_all("tr")[1:-1]:
dataset = zip(headings, (td.get_text() for td in row.find_all("td")))
if dataset:
datasets.append(dataset)
Expand All @@ -57,6 +57,7 @@ def create_proxy_object(self, dataset):
port = None
anonymity = AnonymityLevel.UNKNOWN
country = None
protocols = []
for field in dataset:
if field[0] == 'IP Address':
# Make sure it is a Valid IP
Expand All @@ -71,8 +72,11 @@ def create_proxy_object(self, dataset):
anonymity = AnonymityLevel.get(field[1].strip()) # String strip()
elif field[0] == 'Country':
country = field[1].strip() # String strip()
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country)
elif field[0] == 'Https':
if field[1].strip().lower() == 'yes': protocols.extend([Protocol.HTTP, Protocol.HTTPS])
elif field[1].strip().lower() == 'no': protocols.append(Protocol.HTTP)
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country, protocols=protocols)

def __str__(self):
return "FreeProxy Parser of '{0}' with required bandwidth: '{1}' KBs" \
.format(self.url, self.minimum_bandwidth_in_KBs)
return "{0} parser of '{1}' with required bandwidth: '{2}' KBs" \
.format(self.id, self.url, self.minimum_bandwidth_in_KBs)
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,21 @@
import requests
from bs4 import BeautifulSoup

from http_request_randomizer.requests.parsers.js.UnPacker import JsUnPacker
from http_request_randomizer.requests.parsers.UrlParser import UrlParser
from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel
from http_request_randomizer.requests.proxy.ProxyObject import ProxyObject, AnonymityLevel, Protocol

logger = logging.getLogger(__name__)
__author__ = 'pgaref'


# Samair Proxy now renamed to: premproxy.com
class SamairProxyParser(UrlParser):
class PremProxyParser(UrlParser):
def __init__(self, id, web_url, timeout=None):
self.base_url = web_url
web_url += "/list/"
# Ports decoded by the JS unpacker
self.js_unpacker = None
UrlParser.__init__(self, id=id, web_url=web_url, timeout=timeout)

def parse_proxyList(self):
Expand All @@ -23,6 +27,9 @@ def parse_proxyList(self):
# Get the pageRange from the 'pagination' table
page_set = self.get_pagination_set()
logger.debug("Pages: {}".format(page_set))
# One JS unpacker per provider (not per page)
self.js_unpacker = self.init_js_unpacker()

for page in page_set:
response = requests.get("{0}{1}".format(self.get_url(), page), timeout=self.timeout)
if not response.ok:
Expand All @@ -32,33 +39,22 @@ def parse_proxyList(self):
# Return proxies parsed so far
return curr_proxy_list
content = response.content
soup = BeautifulSoup(content, "html.parser")
# css provides the port number so we reverse it
# for href in soup.findAll('link'):
# if '/styles/' in href.get('href'):
# style = "http://www.samair.ru" + href.get('href')
# break
# css = requests.get(style).content.split('\n')
# css.pop()
# ports = {}
# for l in css:
# p = l.split(' ')
# key = p[0].split(':')[0][1:]
# value = p[1].split('\"')[1]
# ports[key] = value
soup = BeautifulSoup(content, "html.parser", from_encoding="iso-8859-1")

table = soup.find("div", attrs={"id": "proxylist"})
# The first tr contains the field names.
headings = [th.get_text() for th in table.find("tr").find_all("th")]
for row in table.find_all("tr")[1:]:
# skip last 'Select All' row
for row in table.find_all("tr")[1:-1]:
td_row = row.find("td")
# curr_proxy_list.append('http://' + row.text + ports[row['class'][0]])
proxy_obj = self.create_proxy_object(row)
portKey = td_row.find('span', attrs={'class': True}).get('class')[0]
port = self.js_unpacker.get_port(portKey)
proxy_obj = self.create_proxy_object(row, port)
# Make sure it is a Valid Proxy Address
if proxy_obj is not None and UrlParser.valid_ip_port(td_row.text):
if proxy_obj is not None and UrlParser.valid_ip(proxy_obj.ip) and UrlParser.valid_port(port):
curr_proxy_list.append(proxy_obj)
else:
logger.debug("Proxy Invalid: {}".format(td_row.text))
logger.debug("Proxy Invalid: {}".format(proxy_obj.to_str()))
except AttributeError as e:
logger.error("Provider {0} failed with Attribute error: {1}".format(self.id, e))
except KeyError as e:
Expand Down Expand Up @@ -87,7 +83,23 @@ def get_pagination_set(self):
page_set.add("")
return page_set

def create_proxy_object(self, row):
def init_js_unpacker(self):
response = requests.get(self.get_url(), timeout=self.timeout)
# Could not parse provider page - Let user know
if not response.ok:
logger.warning("Proxy Provider url failed: {}".format(self.get_url()))
return None
content = response.content
soup = BeautifulSoup(content, "html.parser")

# js file contains the values for the ports
for script in soup.findAll('script'):
if '/js/' in script.get('src'):
jsUrl = self.base_url + script.get('src')
return JsUnPacker(jsUrl)
return None

def create_proxy_object(self, row, port):
for td_row in row.findAll("td"):
if td_row.attrs['data-label'] == 'IP:port ':
text = td_row.text.strip()
Expand All @@ -96,13 +108,13 @@ def create_proxy_object(self, row):
if not UrlParser.valid_ip(ip):
logger.debug("IP with Invalid format: {}".format(ip))
return None
port = text.split(":")[1]
elif td_row.attrs['data-label'] == 'Anonymity Type: ':
anonymity = AnonymityLevel.get(td_row.text.strip())
elif td_row.attrs['data-label'] == 'Country: ':
country = td_row.text.strip()
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country)
protocols = [Protocol.HTTP]
return ProxyObject(source=self.id, ip=ip, port=port, anonymity_level=anonymity, country=country, protocols=protocols)

def __str__(self):
return "SemairProxy Parser of '{0}' with required bandwidth: '{1}' KBs" \
.format(self.url, self.minimum_bandwidth_in_KBs)
return "{0} parser of '{1}' with required bandwidth: '{2}' KBs" \
.format(self.id, self.url, self.minimum_bandwidth_in_KBs)
4 changes: 4 additions & 0 deletions http_request_randomizer/requests/parsers/UrlParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,7 @@ def valid_ip_port(address):
if not match:
return False
return True

@staticmethod
def valid_port(port):
return 1 <= int(port) <= 65535
41 changes: 41 additions & 0 deletions http_request_randomizer/requests/parsers/js/UnPacker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import re
import requests
import logging

logger = logging.getLogger(__name__)


class JsUnPacker(object):
"""
It takes the javascript file's url which contains the port numbers for
the encrypted strings. The file has to be unpacked to a readable form just like
http://matthewfl.com/unPacker.html does. Then we create a dictionary for
every key:port pair.
"""
# TODO: it might not be necessary to unpack the js code

def __init__(self, js_file_url):
logger.info("JS UnPacker init path: {}".format(js_file_url))
r = requests.get(js_file_url)
encrypted = r.text.strip()
encrypted = '(' + encrypted.split('}(')[1][:-1]
unpacked = eval('self.unpack' +encrypted) # string of the js code in unpacked form
matches = re.findall(r".*?\('\.([a-zA-Z0-9]{1,6})'\).*?\((\d+)\)", unpacked)
self.ports = dict((key, port) for key, port in matches)
logger.debug('portmap: '+str(self.ports))

def baseN(self, num, b, numerals="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
return ((num == 0) and numerals[0]) or (self.baseN(num // b, b, numerals).lstrip(numerals[0]) + numerals[num % b])

def unpack(self, p, a, c, k, e=None, d=None):
while c:
c -= 1
if k[c]:
p = re.sub("\\b" + self.baseN(c, a) + "\\b", k[c], p)
return p

def get_port(self, key):
return self.ports[key]

def get_ports(self):
return self.ports
Empty file.
7 changes: 7 additions & 0 deletions http_request_randomizer/requests/proxy/ProxyObject.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,10 @@ def get(cls, name):
return cls(name)
except ValueError:
return cls.UNKNOWN

class Protocol(Enum):
UNKNOWN = 0
HTTP = 1
HTTPS = 2
SOCS4 = 3
SOCS5 = 4
27 changes: 17 additions & 10 deletions http_request_randomizer/requests/proxy/requestProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
from requests.exceptions import ConnectionError
from requests.exceptions import ReadTimeout

from http_request_randomizer.requests.proxy.ProxyObject import Protocol
from http_request_randomizer.requests.errors.ProxyListException import ProxyListException
from http_request_randomizer.requests.parsers.FreeProxyParser import FreeProxyParser
from http_request_randomizer.requests.parsers.ProxyForEuParser import ProxyForEuParser
from http_request_randomizer.requests.parsers.RebroWeeblyParser import RebroWeeblyParser
from http_request_randomizer.requests.parsers.SamairProxyParser import SamairProxyParser
from http_request_randomizer.requests.parsers.PremProxyParser import PremProxyParser
from http_request_randomizer.requests.useragent.userAgent import UserAgentManager

__author__ = 'pgaref'
Expand All @@ -29,7 +30,7 @@


class RequestProxy:
def __init__(self, web_proxy_list=[], sustain=False, timeout=5):
def __init__(self, web_proxy_list=[], sustain=False, timeout=5, protocol=Protocol.HTTP):
self.userAgent = UserAgentManager()
self.logger = logging.getLogger()
self.logger.addHandler(handler)
Expand All @@ -40,9 +41,9 @@ def __init__(self, web_proxy_list=[], sustain=False, timeout=5):
#####
parsers = list([])
parsers.append(FreeProxyParser('FreeProxy', 'http://free-proxy-list.net', timeout=timeout))
parsers.append(ProxyForEuParser('ProxyForEU', 'http://proxyfor.eu/geo.php', 1.0, timeout=timeout))
parsers.append(RebroWeeblyParser('ReBro', 'http://rebro.weebly.com', timeout=timeout))
parsers.append(SamairProxyParser('Samair', 'https://premproxy.com', timeout=timeout))
#parsers.append(ProxyForEuParser('ProxyForEU', 'http://proxyfor.eu/geo.php', 1.0, timeout=timeout)) <--doesn't work anymore
#parsers.append(RebroWeeblyParser('ReBro', 'http://rebro.weebly.com', timeout=timeout)) <--doesn't work anymore
parsers.append(PremProxyParser('PremProxy', 'https://premproxy.com', timeout=timeout))

self.logger.debug("=== Initialized Proxy Parsers ===")
for i in range(len(parsers)):
Expand All @@ -52,11 +53,17 @@ def __init__(self, web_proxy_list=[], sustain=False, timeout=5):
self.sustain = sustain
self.parsers = parsers
self.proxy_list = web_proxy_list
for i in range(len(parsers)):
for parser in parsers:
try:
self.proxy_list += parsers[i].parse_proxyList()
size = len(self.proxy_list)
self.proxy_list += parser.parse_proxyList()
self.logger.debug('Added {} proxies from {}'.format(len(self.proxy_list)-size, parser.id))
except ReadTimeout:
self.logger.warning("Proxy Parser: '{}' TimedOut!".format(parsers[i].url))
self.logger.warning("Proxy Parser: '{}' TimedOut!".format(parser.url))
self.logger.debug('Total proxies = '+str(len(self.proxy_list)))
# filtering the list of available proxies according to user preferences
self.proxy_list = [p for p in self.proxy_list if protocol in p.protocols]
self.logger.debug('Filtered proxies = '+str(len(self.proxy_list)))
self.current_proxy = self.randomize_proxy()

def set_logger_level(self, level):
Expand Down Expand Up @@ -101,8 +108,8 @@ def generate_proxied_request(self, url, method="GET", params={}, data={}, header

self.logger.debug("Using headers: {0}".format(str(headers)))
self.logger.debug("Using proxy: {0}".format(str(self.current_proxy)))
request = requests.request(method, url, proxies={"http": self.current_proxy.get_address()},
headers=headers, data=data, params=params, timeout=req_timeout)
request = requests.request(method, url, headers=headers, data=data, params=params, timeout=req_timeout,
proxies={"http": self.current_proxy.get_address(), "https": self.current_proxy.get_address()})
# Avoid HTTP request errors
if request.status_code == 409:
raise ConnectionError("HTTP Response [409] - Possible Cloudflare DNS resolution error")
Expand Down
4 changes: 2 additions & 2 deletions http_request_randomizer/requests/runners/proxyList.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from http_request_randomizer.requests.parsers.FreeProxyParser import FreeProxyParser
from http_request_randomizer.requests.parsers.ProxyForEuParser import ProxyForEuParser
from http_request_randomizer.requests.parsers.RebroWeeblyParser import RebroWeeblyParser
from http_request_randomizer.requests.parsers.SamairProxyParser import SamairProxyParser
from http_request_randomizer.requests.parsers.PremProxyParser import PremProxyParser

__author__ = 'pgaref'

Expand All @@ -23,7 +23,7 @@ def __init__(self, timeout=1.0, bandwidth=10.0):
# Each of the entries implements a specific URL Parser
self.parsers = dict()
self.parsers['rebro'] = RebroWeeblyParser('ReBro', 'http://rebro.weebly.com', timeout=timeout)
self.parsers['samair'] = SamairProxyParser('Samair', 'https://premproxy.com', timeout=timeout)
self.parsers['prem'] = PremProxyParser('Prem', 'https://premproxy.com', timeout=timeout)
self.parsers['freeproxy'] = FreeProxyParser('FreeProxy', 'http://free-proxy-list.net', timeout=timeout)
self.parsers['proxyforeu'] = ProxyForEuParser('ProxyForEU', 'http://proxyfor.eu/geo.php',
bandwidth=bandwidth, timeout=timeout)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run_tests(self):

setup(
name='http_request_randomizer',
version='1.2.2',
version='1.2.3',
url='http://pgaref.com/blog/python-proxy',
license='MIT',
author='Panagiotis Garefalakis',
Expand Down
Loading