Skip to content

Commit 70d3702

Browse files
authored
Merge pull request #4876 from juj/parallel_llvm_nm
parallel_llvm_nm
2 parents be6e66a + 8737d35 commit 70d3702

File tree

5 files changed

+179
-90
lines changed

5 files changed

+179
-90
lines changed

tools/cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def try_remove_ending(thestring, ending):
3636

3737
def acquire_cache_lock(self):
3838
if not self.EM_EXCLUSIVE_CACHE_ACCESS:
39-
logging.debug('Cache: acquiring multiprocess file lock to Emscripten cache')
39+
logging.debug('Cache: PID %s acquiring multiprocess file lock to Emscripten cache' % str(os.getpid()))
4040
try:
4141
self.filelock.acquire(60)
4242
except filelock.Timeout:
@@ -54,7 +54,7 @@ def release_cache_lock(self):
5454
if self.prev_EM_EXCLUSIVE_CACHE_ACCESS: os.environ['EM_EXCLUSIVE_CACHE_ACCESS'] = self.prev_EM_EXCLUSIVE_CACHE_ACCESS
5555
else: del os.environ['EM_EXCLUSIVE_CACHE_ACCESS']
5656
self.filelock.release()
57-
logging.debug('Cache: released multiprocess file lock to Emscripten cache')
57+
logging.debug('Cache: PID %s released multiprocess file lock to Emscripten cache' % str(os.getpid()))
5858

5959
def ensure(self):
6060
self.acquire_cache_lock()

tools/duplicate_function_eliminator.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -247,16 +247,8 @@ def write_chunk(chunk, i):
247247
if len(chunks) > 1 and cores >= 2:
248248
# We can parallelize
249249
if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.))
250-
pool = multiprocessing.Pool(processes=cores)
250+
pool = shared.Building.get_multiprocessing_pool()
251251
filenames = pool.map(run_on_chunk, commands, chunksize=1)
252-
try:
253-
# Shut down the pool, since otherwise processes are left alive and would only be lazily terminated,
254-
# and in other parts of the toolchain we also build up multiprocessing pools.
255-
pool.terminate()
256-
pool.join()
257-
except Exception, e:
258-
# On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these.
259-
logging.debug('Attempting to tear down multiprocessing pool failed with an exception: ' + str(e))
260252
else:
261253
# We can't parallize, but still break into chunks to avoid uglify/node memory issues
262254
if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks))

tools/js_optimizer.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -447,16 +447,9 @@ def write_chunk(chunk, i):
447447
if len(chunks) > 1 and cores >= 2:
448448
# We can parallelize
449449
if DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size/(1024*1024.))
450-
pool = multiprocessing.Pool(processes=cores)
451-
filenames = pool.map(run_on_chunk, commands, chunksize=1)
452-
try:
453-
# Shut down the pool, since otherwise processes are left alive and would only be lazily terminated,
454-
# and in other parts of the toolchain we also build up multiprocessing pools.
455-
pool.terminate()
456-
pool.join()
457-
except Exception, e:
458-
# On Windows we get occassional "Access is denied" errors when attempting to tear down the pool, ignore these.
459-
logging.debug('Attempting to tear down multiprocessing pool failed with an exception: ' + str(e))
450+
with ToolchainProfiler.profile_block('optimizer_pool'):
451+
pool = shared.Building.get_multiprocessing_pool()
452+
filenames = pool.map(run_on_chunk, commands, chunksize=1)
460453
else:
461454
# We can't parallize, but still break into chunks to avoid uglify/node memory issues
462455
if len(chunks) > 1 and DEBUG: print >> sys.stderr, 'splitting up js optimization into %d chunks' % (len(chunks))

tools/shared.py

Lines changed: 171 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -895,10 +895,10 @@ def get_vanilla_file():
895895
logging.debug('failed to use vanilla file, will re-check: ' + str(e))
896896
is_vanilla = check_vanilla()
897897
temp_cache = None
898+
os.environ['EMCC_WASM_BACKEND'] = str(is_vanilla)
898899
if is_vanilla:
899900
logging.debug('check tells us to use wasm backend')
900901
LLVM_TARGET = WASM_TARGET
901-
os.environ['EMCC_WASM_BACKEND'] = '1'
902902
else:
903903
logging.debug('check tells us to use asm.js backend')
904904
LLVM_TARGET = ASM_JS_TARGET
@@ -1168,13 +1168,101 @@ def __setattr__(self, attr, value):
11681168
class Settings(object):
11691169
__metaclass__ = Settings2
11701170

1171+
# llvm-ar appears to just use basenames inside archives. as a result, files with the same basename
1172+
# will trample each other when we extract them. to help warn of such situations, we warn if there
1173+
# are duplicate entries in the archive
1174+
def warn_if_duplicate_entries(archive_contents, archive_filename_hint=''):
1175+
if len(archive_contents) != len(set(archive_contents)):
1176+
logging.warning('loading from archive %s, which has duplicate entries (files with identical base names). this is dangerous as only the last will be taken into account, and you may see surprising undefined symbols later. you should rename source files to avoid this problem (or avoid .a archives, and just link bitcode together to form libraries for later linking)' % archive_filename_hint)
1177+
warned = set()
1178+
for i in range(len(archive_contents)):
1179+
curr = archive_contents[i]
1180+
if curr not in warned and curr in archive_contents[i+1:]:
1181+
logging.warning(' duplicate: %s' % curr)
1182+
warned.add(curr)
1183+
1184+
def extract_archive_contents(f):
1185+
cwd = os.getcwd()
1186+
try:
1187+
temp_dir = os.path.join(tempfile.gettempdir(), f.replace('/', '_').replace('\\', '_').replace(':', '_') + '.archive_contents') # TODO: Make sure this is nice and sane
1188+
safe_ensure_dirs(temp_dir)
1189+
os.chdir(temp_dir)
1190+
contents = filter(lambda x: len(x) > 0, Popen([LLVM_AR, 't', f], stdout=PIPE).communicate()[0].split('\n'))
1191+
warn_if_duplicate_entries(contents, f)
1192+
if len(contents) == 0:
1193+
logging.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % f)
1194+
return {
1195+
'dir': temp_dir,
1196+
'files': []
1197+
}
1198+
1199+
# We are about to ask llvm-ar to extract all the files in the .a archive file, but
1200+
# it will silently fail if the directory for the file does not exist, so make all the necessary directories
1201+
for content in contents:
1202+
dirname = os.path.dirname(content)
1203+
if dirname:
1204+
safe_ensure_dirs(dirname)
1205+
Popen([LLVM_AR, 'xo', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory
1206+
contents = map(lambda content: os.path.join(temp_dir, content), contents)
1207+
contents = filter(os.path.exists, map(os.path.abspath, contents))
1208+
contents = filter(Building.is_bitcode, contents)
1209+
return {
1210+
'dir': temp_dir,
1211+
'files': contents
1212+
}
1213+
finally:
1214+
os.chdir(cwd)
1215+
1216+
class ObjectFileInfo:
1217+
def __init__(self, defs, undefs, commons):
1218+
self.defs = defs
1219+
self.undefs = undefs
1220+
self.commons = commons
1221+
1222+
# Due to a python pickling issue, the following two functions must be at top level, or multiprocessing pool spawn won't find them.
1223+
1224+
def g_llvm_nm_uncached(filename):
1225+
return Building.llvm_nm_uncached(filename, stdout=PIPE, stderr=None)
1226+
1227+
def g_multiprocessing_initializer(*args):
1228+
for item in args:
1229+
(key, value) = item.split('=')
1230+
os.environ[key] = value
1231+
11711232
# Building
11721233

11731234
class Building:
11741235
COMPILER = CLANG
11751236
LLVM_OPTS = False
11761237
COMPILER_TEST_OPTS = [] # For use of the test runner
11771238
JS_ENGINE_OVERRIDE = None # Used to pass the JS engine override from runner.py -> test_benchmark.py
1239+
multiprocessing_pool = None
1240+
1241+
# Multiprocessing pools are very slow to build up and tear down, and having several pools throughout
1242+
# the application has a problem of overallocating child processes. Therefore maintain a single
1243+
# centralized pool that is shared between all pooled task invocations.
1244+
@staticmethod
1245+
def get_multiprocessing_pool():
1246+
if not Building.multiprocessing_pool:
1247+
cores = int(os.environ.get('EMCC_CORES') or multiprocessing.cpu_count())
1248+
1249+
# If running with one core only, create a mock instance of a pool that does not
1250+
# actually spawn any new subprocesses. Very useful for internal debugging.
1251+
if cores == 1:
1252+
class FakeMultiprocessor:
1253+
def map(self, func, tasks):
1254+
results = []
1255+
for t in tasks:
1256+
results += [func(t)]
1257+
return results
1258+
Building.multiprocessing_pool = FakeMultiprocessor()
1259+
else:
1260+
child_env = [
1261+
'EMCC_WASM_BACKEND='+os.environ['EMCC_WASM_BACKEND'], # Multiprocessing pool children avoid all calling check_vanilla() again and again
1262+
'EMCC_CORES=1' # Multiprocessing pool children can't spawn their own linear number of children, that could cause a quadratic amount of spawned processes.
1263+
]
1264+
Building.multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env)
1265+
return Building.multiprocessing_pool
11781266

11791267
@staticmethod
11801268
def get_building_env(native=False):
@@ -1419,6 +1507,58 @@ def open_make_err(i, mode='r'):
14191507
os.chdir(old_dir)
14201508
return generated_libs
14211509

1510+
@staticmethod
1511+
def make_paths_absolute(f):
1512+
if f.startswith('-'): # skip flags
1513+
return f
1514+
else:
1515+
return os.path.abspath(f)
1516+
1517+
# Runs llvm-nm in parallel for the given list of files.
1518+
# The results are populated in Building.uninternal_nm_cache
1519+
# multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None
1520+
# to have the function allocate its own.
1521+
@staticmethod
1522+
def parallel_llvm_nm(files):
1523+
with ToolchainProfiler.profile_block('parallel_llvm_nm'):
1524+
pool = Building.get_multiprocessing_pool()
1525+
object_contents = pool.map(g_llvm_nm_uncached, files)
1526+
1527+
for i in range(len(files)):
1528+
Building.uninternal_nm_cache[files[i]] = object_contents[i]
1529+
return object_contents
1530+
1531+
@staticmethod
1532+
def read_link_inputs(files):
1533+
with ToolchainProfiler.profile_block('read_link_inputs'):
1534+
# Before performing the link, we need to look at each input file to determine which symbols
1535+
# each of them provides. Do this in multiple parallel processes.
1536+
archive_names = [] # .a files passed in to the command line to the link
1537+
object_names = [] # .o/.bc files passed in to the command line to the link
1538+
for f in files:
1539+
absolute_path_f = Building.make_paths_absolute(f)
1540+
1541+
if not absolute_path_f in Building.ar_contents and Building.is_ar(absolute_path_f):
1542+
archive_names.append(absolute_path_f)
1543+
elif not absolute_path_f in Building.uninternal_nm_cache and Building.is_bitcode(absolute_path_f):
1544+
object_names.append(absolute_path_f)
1545+
1546+
# Archives contain objects, so process all archives first in parallel to obtain the object files in them.
1547+
pool = Building.get_multiprocessing_pool()
1548+
object_names_in_archives = pool.map(extract_archive_contents, archive_names)
1549+
1550+
for n in range(len(archive_names)):
1551+
Building.ar_contents[archive_names[n]] = object_names_in_archives[n]['files']
1552+
1553+
for o in object_names_in_archives:
1554+
for f in o['files']:
1555+
if not f in Building.uninternal_nm_cache:
1556+
object_names.append(f)
1557+
1558+
# Next, extract symbols from all object files (either standalone or inside archives we just extracted)
1559+
# The results are not used here directly, but populated to llvm-nm cache structure.
1560+
Building.parallel_llvm_nm(object_names)
1561+
14221562
@staticmethod
14231563
def link(files, target, force_archive_contents=False, temp_files=None, just_calculate=False):
14241564
if not temp_files:
@@ -1429,19 +1569,12 @@ def link(files, target, force_archive_contents=False, temp_files=None, just_calc
14291569
# For a simple application, this would just be "main".
14301570
unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS])
14311571
resolved_symbols = set()
1432-
def make_paths_absolute(f):
1433-
if f.startswith('-'): # skip flags
1434-
return f
1435-
else:
1436-
return os.path.abspath(f)
14371572
# Paths of already included object files from archives.
14381573
added_contents = set()
1439-
# Map of archive name to list of extracted object file paths.
1440-
ar_contents = {}
14411574
has_ar = False
14421575
for f in files:
14431576
if not f.startswith('-'):
1444-
has_ar = has_ar or Building.is_ar(make_paths_absolute(f))
1577+
has_ar = has_ar or Building.is_ar(Building.make_paths_absolute(f))
14451578

14461579
# If we have only one archive or the force_archive_contents flag is set,
14471580
# then we will add every object file we see, regardless of whether it
@@ -1467,51 +1600,14 @@ def consider_object(f, force_add=False):
14671600
actual_files.append(f)
14681601
return do_add
14691602

1470-
def get_archive_contents(f):
1471-
if f in ar_contents:
1472-
return ar_contents[f]
1473-
1474-
cwd = os.getcwd()
1475-
try:
1476-
temp_dir = temp_files.get_dir()
1477-
os.chdir(temp_dir)
1478-
contents = filter(lambda x: len(x) > 0, Popen([LLVM_AR, 't', f], stdout=PIPE).communicate()[0].split('\n'))
1479-
# llvm-ar appears to just use basenames inside archives. as a result, files with the same basename
1480-
# will trample each other when we extract them. to help warn of such situations, we warn if there
1481-
# are duplicate entries in the archive
1482-
if len(contents) != len(set(contents)):
1483-
logging.warning('loading from archive %s, which has duplicate entries (files with identical base names). this is dangerous as only the last will be taken into account, and you may see surprising undefined symbols later. you should rename source files to avoid this problem (or avoid .a archives, and just link bitcode together to form libraries for later linking)' % f)
1484-
warned = set()
1485-
for i in range(len(contents)):
1486-
curr = contents[i]
1487-
if curr not in warned and curr in contents[i+1:]:
1488-
logging.warning(' duplicate: %s' % curr)
1489-
warned.add(curr)
1490-
if len(contents) == 0:
1491-
logging.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % f)
1492-
else:
1493-
for content in contents: # ar will silently fail if the directory for the file does not exist, so make all the necessary directories
1494-
dirname = os.path.dirname(content)
1495-
if dirname:
1496-
safe_ensure_dirs(dirname)
1497-
Popen([LLVM_AR, 'x', f], stdout=PIPE).communicate() # if absolute paths, files will appear there. otherwise, in this directory
1498-
contents = map(lambda content: os.path.join(temp_dir, content), contents)
1499-
contents = filter(os.path.exists, map(os.path.abspath, contents))
1500-
contents = filter(Building.is_bitcode, contents)
1501-
ar_contents[f] = contents
1502-
finally:
1503-
os.chdir(cwd)
1504-
1505-
return contents
1506-
15071603
# Traverse a single archive. The object files are repeatedly scanned for
15081604
# newly satisfied symbols until no new symbols are found. Returns true if
15091605
# any object files were added to the link.
15101606
def consider_archive(f):
15111607
added_any_objects = False
15121608
loop_again = True
15131609
logging.debug('considering archive %s' % (f))
1514-
contents = get_archive_contents(f)
1610+
contents = Building.ar_contents[f]
15151611
while loop_again: # repeatedly traverse until we have everything we need
15161612
loop_again = False
15171613
for content in contents:
@@ -1524,9 +1620,11 @@ def consider_archive(f):
15241620
logging.debug('done running loop of archive %s' % (f))
15251621
return added_any_objects
15261622

1623+
Building.read_link_inputs(filter(lambda x: not x.startswith('-'), files))
1624+
15271625
current_archive_group = None
15281626
for f in files:
1529-
absolute_path_f = make_paths_absolute(f)
1627+
absolute_path_f = Building.make_paths_absolute(f)
15301628
if f.startswith('-'):
15311629
if f in ['--start-group', '-(']:
15321630
assert current_archive_group is None, 'Nested --start-group, missing --end-group?'
@@ -1672,10 +1770,9 @@ def llvm_as(input_filename, output_filename=None):
16721770

16731771
@staticmethod
16741772
def parse_symbols(output, include_internal=False):
1675-
class ret:
1676-
defs = []
1677-
undefs = []
1678-
commons = []
1773+
defs = []
1774+
undefs = []
1775+
commons = []
16791776
for line in output.split('\n'):
16801777
if len(line) == 0: continue
16811778
if ':' in line: continue # e.g. filename.o: , saying which file it's from
@@ -1686,30 +1783,37 @@ class ret:
16861783
if len(parts) == 2: # ignore lines with absolute offsets, these are not bitcode anyhow (e.g. |00000630 t d_source_name|)
16871784
status, symbol = parts
16881785
if status == 'U':
1689-
ret.undefs.append(symbol)
1786+
undefs.append(symbol)
16901787
elif status == 'C':
1691-
ret.commons.append(symbol)
1788+
commons.append(symbol)
16921789
elif (not include_internal and status == status.upper()) or \
16931790
( include_internal and status in ['W', 't', 'T', 'd', 'D']): # FIXME: using WTD in the previous line fails due to llvm-nm behavior on OS X,
16941791
# so for now we assume all uppercase are normally defined external symbols
1695-
ret.defs.append(symbol)
1696-
ret.defs = set(ret.defs)
1697-
ret.undefs = set(ret.undefs)
1698-
ret.commons = set(ret.commons)
1699-
return ret
1792+
defs.append(symbol)
1793+
return ObjectFileInfo(set(defs), set(undefs), set(commons))
17001794

1701-
nm_cache = {} # cache results of nm - it can be slow to run
1795+
internal_nm_cache = {} # cache results of nm - it can be slow to run
1796+
uninternal_nm_cache = {}
1797+
ar_contents = {} # Stores the object files contained in different archive files passed as input
17021798

17031799
@staticmethod
1704-
def llvm_nm(filename, stdout=PIPE, stderr=None, include_internal=False):
1705-
if filename in Building.nm_cache:
1706-
#logging.debug('loading nm results for %s from cache' % filename)
1707-
return Building.nm_cache[filename]
1708-
1800+
def llvm_nm_uncached(filename, stdout=PIPE, stderr=None, include_internal=False):
17091801
# LLVM binary ==> list of symbols
17101802
output = Popen([LLVM_NM, filename], stdout=stdout, stderr=stderr).communicate()[0]
1711-
ret = Building.parse_symbols(output, include_internal)
1712-
Building.nm_cache[filename] = ret
1803+
return Building.parse_symbols(output, include_internal)
1804+
1805+
@staticmethod
1806+
def llvm_nm(filename, stdout=PIPE, stderr=None, include_internal=False):
1807+
if include_internal and filename in Building.internal_nm_cache:
1808+
return Building.internal_nm_cache[filename]
1809+
elif not include_internal and filename in Building.uninternal_nm_cache:
1810+
return Building.uninternal_nm_cache[filename]
1811+
1812+
ret = Building.llvm_nm_uncached(filename, stdout, stderr, include_internal)
1813+
1814+
if include_internal: Building.internal_nm_cache[filename] = ret
1815+
else: Building.uninternal_nm_cache[filename] = ret
1816+
17131817
return ret
17141818

17151819
@staticmethod

0 commit comments

Comments
 (0)