Skip to content

Commit 33151c8

Browse files
committed
update permission scraping script
1 parent 9c95a4c commit 33151c8

File tree

2 files changed

+53
-44
lines changed

2 files changed

+53
-44
lines changed

mode/scripts/permissions.py

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,84 @@
11
import sys, re
22

3-
from urllib2 import urlopen
4-
from BeautifulSoup import BeautifulSoup
3+
from urllib.request import urlopen
4+
from bs4 import BeautifulSoup
55

6-
def getSoup(url):
7-
print 'Opening', url, '...'
6+
def get_soup(url):
87
page = urlopen(url)
9-
soup = BeautifulSoup(page)
8+
soup = BeautifulSoup(page, features="lxml")
109
return soup
1110

12-
def parseAll():
13-
soup = getSoup("https://developer.android.com/reference/android/Manifest.permission.html")
14-
print ' parsing...'
11+
def parse_all(soup):
12+
print('Parsing all permissions...')
1513
table = soup.find('table', { 'id': 'constants', 'class' : 'responsive constants' })
16-
entries = table.findAll('tr')
17-
strList = ''
14+
entries = table.find_all('tr')
15+
str_list = ''
1816
for entry in entries:
1917
if not entry or not entry.attrs: continue
20-
if 'absent' in entry.attrs[0][1]: continue
2118
info = entry.find('td', {'width':'100%'})
2219
if info:
2320
name = info.find('code').find('a').contents[0]
2421
pieces = []
22+
deprecated = False
2523
for piece in info.find('p').contents:
2624
piece_str = re.sub('\s+', ' ', str(piece)).strip()
2725
if '<code>' in piece_str:
28-
piece_str = piece.find('a').contents[0].strip();
26+
piece_str = piece.find('a').contents[0].strip()
27+
if '<em>' in piece_str and 'This constant was deprecated' in piece_str:
28+
deprecated = True
2929
pieces += [piece_str]
30-
if name and pieces:
30+
if name and pieces and not deprecated:
3131
desc = ' '.join(pieces).strip().replace('"', '\\"')
32-
strList += (',' if strList else '') + '\n "' + name + '", "' + desc + '"'
33-
strList = 'static final String[] listing = {' + strList + '\n };\n'
34-
return strList
32+
str_list += (',' if str_list else '') + '\n "' + name + '", "' + desc + '"'
33+
str_list = 'static final String[] listing = {' + str_list + '\n };\n'
34+
return str_list
3535

36-
def replaceAll(source, strList):
37-
print ' replacing...'
36+
def replace_all(source, str_list):
37+
print('Replacing old permissions...')
3838
idx0 = source.find('static final String[] listing = {')
3939
idx1 = source[idx0:].find(' };')
40-
return source[:idx0] + strList + source[idx0+idx1+5:]
40+
return source[:idx0] + str_list + source[idx0+idx1+5:]
4141

42-
def parseDanger():
43-
soup = getSoup("https://developer.android.com/guide/topics/security/permissions.html")
44-
print ' parsing...'
45-
table = soup.find('table')
46-
entries = table.findAll('tr')
47-
strList = ''
48-
for entry in entries:
49-
items = entry.findAll('li')
42+
def parse_danger(soup):
43+
print('Parsing dangerous permissions...')
44+
entries = soup.find_all(lambda tag:tag.name == "div" and
45+
len(tag.attrs) == 1 and
46+
"data-version-added" in tag.attrs)
47+
str_list = ''
48+
for entry in entries:
49+
name = entry.find('h3').contents[0]
50+
items = entry.find_all('p')
5051
for item in items:
51-
name = item.find('code').find('a').contents[0]
52-
strList += (',' if strList else '') + '\n "' + name + '"'
53-
strList = 'static final String[] dangerous = {' + strList + '\n };\n'
54-
return strList
52+
text = item.getText().strip()
53+
if 'Protection level:' in text and 'dangerous' in text:
54+
str_list += (',' if str_list else '') + '\n "' + name + '"'
55+
str_list = 'static final String[] dangerous = {' + str_list + '\n };\n'
56+
return str_list
5557

56-
def replaceDanger(source, strList):
57-
print ' replacing...'
58+
def replace_danger(source, str_list):
59+
print('Replacing dangerous permissions...')
5860
idx0 = source.find('static final String[] dangerous = {')
5961
idx1 = source[idx0:].find(' };')
60-
return source[:idx0] + strList + source[idx0+idx1+5:]
62+
return source[:idx0] + str_list + source[idx0+idx1+5:]
63+
64+
java_file = '../src/processing/mode/android/Permissions.java'
65+
ref_url = 'https://developer.android.com/reference/android/Manifest.permission.html'
6166

62-
javaFile = '../src/processing/mode/android/Permissions.java'
63-
print 'Reading Permissions.java...'
64-
with open(javaFile, 'r') as f:
67+
print('Reading Android reference...')
68+
soup = get_soup(ref_url)
69+
70+
print('Reading Permissions.java...')
71+
with open(java_file, 'r') as f:
6572
source = f.read()
6673

67-
allList = parseAll()
68-
source = replaceAll(source, allList)
74+
all_list = parse_all(soup)
75+
source = replace_all(source, all_list)
6976

70-
dangerList = parseDanger()
71-
source = replaceDanger(source, dangerList)
77+
danger_list = parse_danger(soup)
78+
source = replace_danger(source, danger_list)
7279

73-
print 'Writing Permissions.java...'
74-
with open(javaFile, 'w') as f:
80+
print('Writing Permissions.java...')
81+
with open(java_file, 'w') as f:
7582
f.write(source)
76-
print 'Done.'
83+
84+
print('Done.')

mode/scripts/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
beautifulsoup4

0 commit comments

Comments
 (0)