Skip to content

Commit fc186bd

Browse files
authored
Revert "Implement ar-file parsing in python (#8681)" (#8744)
This reverts commit 1d198e4. Apparently this cause a failure on the mac builder: https://ci.chromium.org/p/emscripten-releases/builders/ci/mac/b8911649373078606944 ``` cache:INFO: - ok Traceback (most recent call last): File "/b/s/w/ir/k/install/emscripten/emcc.py", line 3391, in <module> sys.exit(run(sys.argv)) File "/b/s/w/ir/k/install/emscripten/emcc.py", line 1894, in run final = shared.Building.link(linker_inputs, DEFAULT_FINAL, force_archive_contents=force_archive_contents, just_calculate=just_calculate) File "/b/s/w/ir/k/install/emscripten/tools/shared.py", line 1940, in link Building.read_link_inputs([x for x in files if not x.startswith('-')]) File "/b/s/w/ir/k/install/emscripten/tools/shared.py", line 1721, in read_link_inputs object_names_in_archives = pool.map(extract_archive_contents, archive_names) File "/b/s/w/ir/cipd_bin_packages/lib/python2.7/multiprocessing/pool.py", line 253, in map return self.map_async(func, iterable, chunksize).get() File "/b/s/w/ir/cipd_bin_packages/lib/python2.7/multiprocessing/pool.py", line 572, in get raise self._value IOError: [Errno 2] No such file or directory: u'/b/s/w/ir/tmp/t/emscripten_temp_oIE5H7_archive_contents/#1/12' ```
1 parent 0f3fcfa commit fc186bd

File tree

5 files changed

+180
-224
lines changed

5 files changed

+180
-224
lines changed

emar.py

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,98 @@
66

77
"""Archive helper script
88
9-
This script is a simple wrapper around llvm-ar. It used to have special
10-
handling for duplicate basenames in order to allow bitcode linking process to
11-
read such files. This is now handled by using tools/arfile.py to read archives.
9+
This script acts as a frontend replacement for `ar`. See emcc.
10+
This is needed because, unlike a traditional linker, emscripten can't handle
11+
archive with duplicate member names. This is because emscripten extracts
12+
archive to a temporary location and duplicate filenames will clobber each
13+
other in this case.
1214
"""
1315

16+
# TODO(sbc): Implement `ar x` within emscripten, in python, to avoid this issue
17+
# and delete this file.
18+
19+
from __future__ import print_function
20+
import hashlib
21+
import os
22+
import shutil
1423
import sys
1524

25+
from tools.toolchain_profiler import ToolchainProfiler
1626
from tools import shared
27+
from tools.response_file import substitute_response_files, create_response_file
28+
29+
if __name__ == '__main__':
30+
ToolchainProfiler.record_process_start()
1731

1832

33+
#
34+
# Main run() function
35+
#
1936
def run():
20-
newargs = [shared.LLVM_AR] + sys.argv[1:]
21-
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
37+
args = substitute_response_files(sys.argv)
38+
newargs = [shared.LLVM_AR] + args[1:]
39+
40+
to_delete = []
41+
42+
# The 3 argmuent form of ar doesn't involve other files. For example
43+
# 'ar x libfoo.a'.
44+
if len(newargs) > 3:
45+
cmd = newargs[1]
46+
if 'r' in cmd:
47+
# We are adding files to the archive.
48+
# Normally the output file is then arg 2, except in the case were the
49+
# a or b modifiers are used in which case its arg 3.
50+
if 'a' in cmd or 'b' in cmd:
51+
out_arg_index = 3
52+
else:
53+
out_arg_index = 2
54+
55+
contents = set()
56+
if os.path.exists(newargs[out_arg_index]):
57+
cmd = [shared.LLVM_AR, 't', newargs[out_arg_index]]
58+
output = shared.check_call(cmd, stdout=shared.PIPE).stdout
59+
contents.update(output.split('\n'))
60+
61+
# Add a hash to colliding basename, to make them unique.
62+
for j in range(out_arg_index + 1, len(newargs)):
63+
orig_name = newargs[j]
64+
full_name = os.path.abspath(orig_name)
65+
dirname = os.path.dirname(full_name)
66+
basename = os.path.basename(full_name)
67+
if basename not in contents:
68+
contents.add(basename)
69+
continue
70+
h = hashlib.md5(full_name.encode('utf-8')).hexdigest()[:8]
71+
parts = basename.split('.')
72+
parts[0] += '_' + h
73+
newname = '.'.join(parts)
74+
full_newname = os.path.join(dirname, newname)
75+
assert not os.path.exists(full_newname)
76+
try:
77+
shutil.copyfile(orig_name, full_newname)
78+
newargs[j] = full_newname
79+
to_delete.append(full_newname)
80+
contents.add(newname)
81+
except:
82+
# it is ok to fail here, we just don't get hashing
83+
contents.add(basename)
84+
pass
85+
86+
if shared.DEBUG:
87+
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)
88+
89+
response_filename = create_response_file(newargs[3:], shared.get_emscripten_temp_dir())
90+
to_delete += [response_filename]
91+
newargs = newargs[:3] + ['@' + response_filename]
92+
93+
if shared.DEBUG:
94+
print('emar:', sys.argv, ' ==> ', newargs, file=sys.stderr)
95+
96+
try:
97+
return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
98+
finally:
99+
for d in to_delete:
100+
shared.try_delete(d)
22101

23102

24103
if __name__ == '__main__':

tests/test_core.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import hashlib
99
import json
1010
import os
11+
import random
1112
import re
1213
import shutil
1314
import sys
@@ -5163,25 +5164,26 @@ def test_iostream_and_determinism(self):
51635164
return 0;
51645165
}
51655166
'''
5166-
num = 3
5167+
num = 5
51675168

51685169
def test():
51695170
print('(iteration)')
5170-
time.sleep(1.0)
5171+
time.sleep(random.random() / (10 * num)) # add some timing nondeterminism here, not that we need it, but whatever
51715172
self.do_run(src, 'hello world\n77.\n')
51725173
ret = open('src.cpp.o.js', 'rb').read()
51735174
if self.get_setting('WASM') and not self.get_setting('WASM2JS'):
51745175
ret += open('src.cpp.o.wasm', 'rb').read()
51755176
return ret
51765177

51775178
builds = [test() for i in range(num)]
5178-
print([len(b) for b in builds])
5179+
print(list(map(len, builds)))
51795180
uniques = set(builds)
51805181
if len(uniques) != 1:
5181-
for i, unique in enumerate(uniques):
5182+
i = 0
5183+
for unique in uniques:
51825184
open('unique_' + str(i) + '.js', 'wb').write(unique)
5183-
# builds must be deterministic, see unique_N.js
5184-
self.assertEqual(len(uniques), 1)
5185+
i += 1
5186+
assert 0, 'builds must be deterministic, see unique_X.js'
51855187

51865188
def test_stdvec(self):
51875189
self.do_run_in_out_file_test('tests', 'core', 'test_stdvec')

tests/test_other.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,10 +1450,16 @@ def test_archive_duplicate_basenames(self):
14501450
''')
14511451
run_process([PYTHON, EMCC, os.path.join('b', 'common.c'), '-c', '-o', os.path.join('b', 'common.o')])
14521452

1453-
try_delete('libdup.a')
1454-
run_process([PYTHON, EMAR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
1455-
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
1456-
self.assertEqual(text.count('common.o'), 2)
1453+
try_delete('liba.a')
1454+
run_process([PYTHON, EMAR, 'rc', 'liba.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
1455+
1456+
# Verify that archive contains basenames with hashes to avoid duplication
1457+
text = run_process([PYTHON, EMAR, 't', 'liba.a'], stdout=PIPE).stdout
1458+
self.assertEqual(text.count('common.o'), 1)
1459+
self.assertContained('common_', text)
1460+
for line in text.split('\n'):
1461+
# should not have huge hash names
1462+
self.assertLess(len(line), 20, line)
14571463

14581464
create_test_file('main.c', r'''
14591465
void a(void);
@@ -1463,9 +1469,30 @@ def test_archive_duplicate_basenames(self):
14631469
b();
14641470
}
14651471
''')
1466-
run_process([PYTHON, EMCC, 'main.c', '-L.', '-ldup'])
1472+
err = run_process([PYTHON, EMCC, 'main.c', '-L.', '-la'], stderr=PIPE).stderr
1473+
self.assertNotIn('archive file contains duplicate entries', err)
14671474
self.assertContained('a\nb...\n', run_js('a.out.js'))
14681475

1476+
# Using llvm-ar directly should cause duplicate basenames
1477+
try_delete('libdup.a')
1478+
run_process([LLVM_AR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
1479+
text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
1480+
assert text.count('common.o') == 2, text
1481+
1482+
# With fastcomp we don't support duplicate members so this should generate
1483+
# a warning. With the wasm backend (lld) this is fully supported.
1484+
cmd = [PYTHON, EMCC, 'main.c', '-L.', '-ldup']
1485+
if self.is_wasm_backend():
1486+
run_process(cmd)
1487+
self.assertContained('a\nb...\n', run_js('a.out.js'))
1488+
else:
1489+
err = self.expect_fail(cmd)
1490+
self.assertIn('libdup.a: archive file contains duplicate entries', err)
1491+
self.assertIn('error: undefined symbol: a', err)
1492+
# others are not duplicates - the hashing keeps them separate
1493+
self.assertEqual(err.count('duplicate: '), 1)
1494+
self.assertContained('a\nb...\n', run_js('a.out.js'))
1495+
14691496
def test_export_from_archive(self):
14701497
export_name = 'this_is_an_entry_point'
14711498
full_export_name = '_' + export_name

tools/arfile.py

Lines changed: 0 additions & 191 deletions
This file was deleted.

0 commit comments

Comments
 (0)