diff --git a/emar.py b/emar.py
index e45852361333d..5f8db6316b65c 100755
--- a/emar.py
+++ b/emar.py
@@ -6,98 +6,19 @@
 
 """Archive helper script
 
-This script acts as a frontend replacement for `ar`. See emcc.
-This is needed because, unlike a traditional linker, emscripten can't handle
-archive with duplicate member names.  This is because emscripten extracts
-archive to a temporary location and duplicate filenames will clobber each
-other in this case.
+This script is a simple wrapper around llvm-ar.  It used to have special
+handling for duplicate basenames in order to allow bitcode linking process to
+read such files.  This is now handled by using tools/arfile.py to read archives.
 """
 
-# TODO(sbc): Implement `ar x` within emscripten, in python, to avoid this issue
-# and delete this file.
-
-from __future__ import print_function
-import hashlib
-import os
-import shutil
 import sys
 
-from tools.toolchain_profiler import ToolchainProfiler
 from tools import shared
-from tools.response_file import substitute_response_files, create_response_file
-
-if __name__ == '__main__':
-  ToolchainProfiler.record_process_start()
 
 
-#
-# Main run() function
-#
 def run():
-  args = substitute_response_files(sys.argv)
-  newargs = [shared.LLVM_AR] + args[1:]
-
-  to_delete = []
-
-  # The 3 argmuent form of ar doesn't involve other files. For example
-  # 'ar x libfoo.a'.
-  if len(newargs) > 3:
-    cmd = newargs[1]
-    if 'r' in cmd:
-      # We are adding files to the archive.
-      # Normally the output file is then arg 2, except in the case were the
-      # a or b modifiers are used in which case its arg 3.
-      if 'a' in cmd or 'b' in cmd:
-        out_arg_index = 3
-      else:
-        out_arg_index = 2
-
-      contents = set()
-      if os.path.exists(newargs[out_arg_index]):
-        cmd = [shared.LLVM_AR, 't', newargs[out_arg_index]]
-        output = shared.check_call(cmd, stdout=shared.PIPE).stdout
-        contents.update(output.split('\n'))
-
-      # Add a hash to colliding basename, to make them unique.
-      for j in range(out_arg_index + 1, len(newargs)):
-        orig_name = newargs[j]
-        full_name = os.path.abspath(orig_name)
-        dirname = os.path.dirname(full_name)
-        basename = os.path.basename(full_name)
-        if basename not in contents:
-          contents.add(basename)
-          continue
-        h = hashlib.md5(full_name.encode('utf-8')).hexdigest()[:8]
-        parts = basename.split('.')
-        parts[0] += '_' + h
-        newname = '.'.join(parts)
-        full_newname = os.path.join(dirname, newname)
-        assert not os.path.exists(full_newname)
-        try:
-          shutil.copyfile(orig_name, full_newname)
-          newargs[j] = full_newname
-          to_delete.append(full_newname)
-          contents.add(newname)
-        except:
-          # it is ok to fail here, we just don't get hashing
-          contents.add(basename)
-          pass
-
-    if shared.DEBUG:
-      print('emar:', sys.argv, '  ==>  ', newargs, file=sys.stderr)
-
-    response_filename = create_response_file(newargs[3:], shared.get_emscripten_temp_dir())
-    to_delete += [response_filename]
-    newargs = newargs[:3] + ['@' + response_filename]
-
-  if shared.DEBUG:
-    print('emar:', sys.argv, '  ==>  ', newargs, file=sys.stderr)
-
-  try:
-    return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
-  finally:
-    for d in to_delete:
-      shared.try_delete(d)
+  newargs = [shared.LLVM_AR] + sys.argv[1:]
+  return shared.run_process(newargs, stdin=sys.stdin, check=False).returncode
 
 
 if __name__ == '__main__':
diff --git a/tests/test_core.py b/tests/test_core.py
index 67eaf7d138e7b..33d08410e70bc 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -8,7 +8,6 @@
 import hashlib
 import json
 import os
-import random
 import re
 import shutil
 import sys
@@ -5164,11 +5163,11 @@ def test_iostream_and_determinism(self):
         return 0;
       }
     '''
-    num = 5
+    num = 3
 
     def test():
       print('(iteration)')
-      time.sleep(random.random() / (10 * num)) # add some timing nondeterminism here, not that we need it, but whatever
+      time.sleep(1.0)
       self.do_run(src, 'hello world\n77.\n')
       ret = open('src.cpp.o.js', 'rb').read()
       if self.get_setting('WASM') and not self.get_setting('WASM2JS'):
@@ -5176,14 +5175,13 @@ def test():
       return ret
 
     builds = [test() for i in range(num)]
-    print(list(map(len, builds)))
+    print([len(b) for b in builds])
     uniques = set(builds)
     if len(uniques) != 1:
-      i = 0
-      for unique in uniques:
+      for i, unique in enumerate(uniques):
         open('unique_' + str(i) + '.js', 'wb').write(unique)
-        i += 1
-      assert 0, 'builds must be deterministic, see unique_X.js'
+      # builds must be deterministic, see unique_N.js
+      self.assertEqual(len(uniques), 1)
 
   def test_stdvec(self):
     self.do_run_in_out_file_test('tests', 'core', 'test_stdvec')
diff --git a/tests/test_other.py b/tests/test_other.py
index 69304acf4d1db..9b5e879584136 100644
--- a/tests/test_other.py
+++ b/tests/test_other.py
@@ -1450,16 +1450,10 @@ def test_archive_duplicate_basenames(self):
     ''')
     run_process([PYTHON, EMCC, os.path.join('b', 'common.c'), '-c', '-o', os.path.join('b', 'common.o')])
 
-    try_delete('liba.a')
-    run_process([PYTHON, EMAR, 'rc', 'liba.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
-
-    # Verify that archive contains basenames with hashes to avoid duplication
-    text = run_process([PYTHON, EMAR, 't', 'liba.a'], stdout=PIPE).stdout
-    self.assertEqual(text.count('common.o'), 1)
-    self.assertContained('common_', text)
-    for line in text.split('\n'):
-      # should not have huge hash names
-      self.assertLess(len(line), 20, line)
+    try_delete('libdup.a')
+    run_process([PYTHON, EMAR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
+    text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
+    self.assertEqual(text.count('common.o'), 2)
 
     create_test_file('main.c', r'''
       void a(void);
@@ -1469,30 +1463,9 @@ def test_archive_duplicate_basenames(self):
         b();
       }
     ''')
-    err = run_process([PYTHON, EMCC, 'main.c', '-L.', '-la'], stderr=PIPE).stderr
-    self.assertNotIn('archive file contains duplicate entries', err)
+    run_process([PYTHON, EMCC, 'main.c', '-L.', '-ldup'])
     self.assertContained('a\nb...\n', run_js('a.out.js'))
 
-    # Using llvm-ar directly should cause duplicate basenames
-    try_delete('libdup.a')
-    run_process([LLVM_AR, 'rc', 'libdup.a', os.path.join('a', 'common.o'), os.path.join('b', 'common.o')])
-    text = run_process([PYTHON, EMAR, 't', 'libdup.a'], stdout=PIPE).stdout
-    assert text.count('common.o') == 2, text
-
-    # With fastcomp we don't support duplicate members so this should generate
-    # a warning.  With the wasm backend (lld) this is fully supported.
-    cmd = [PYTHON, EMCC, 'main.c', '-L.', '-ldup']
-    if self.is_wasm_backend():
-      run_process(cmd)
-      self.assertContained('a\nb...\n', run_js('a.out.js'))
-    else:
-      err = self.expect_fail(cmd)
-      self.assertIn('libdup.a: archive file contains duplicate entries', err)
-      self.assertIn('error: undefined symbol: a', err)
-      # others are not duplicates - the hashing keeps them separate
-      self.assertEqual(err.count('duplicate: '), 1)
-      self.assertContained('a\nb...\n', run_js('a.out.js'))
-
   def test_export_from_archive(self):
     export_name = 'this_is_an_entry_point'
     full_export_name = '_' + export_name
diff --git a/tools/arfile.py b/tools/arfile.py
new file mode 100755
index 0000000000000..ce41e3e66f391
--- /dev/null
+++ b/tools/arfile.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+# Copyright 2019 The Emscripten Authors.  All rights reserved.
+# Emscripten is available under two separate licenses, the MIT license and the
+# University of Illinois/NCSA Open Source License.  Both these licenses can be
+# found in the LICENSE file.
+
+"""Utility functions for parsing 'ar' files.
+
+This is needed in emscripten because command line tools such as llvm-ar are not
+able to deal with archives containing many files with the same name.  Despite
+this, linkers are expected to handle this case and emscripten needs to emulate
+linker behaviour when using the fastcomp backend.
+
+See https://en.wikipedia.org/wiki/Ar_(Unix)
+"""
+
+from __future__ import print_function
+
+import struct
+import os
+import sys
+
+MAGIC = b'!<arch>\n'
+builtin_open = open
+
+
+class ArError(Exception):
+  """Base exception."""
+  pass
+
+
+class ArInfo(object):
+  def __init__(self, name, offset, timestamp, owner, group, mode, size, data):
+    self.name = name
+    self.offset = offset
+    self.timestamp = timestamp
+    self.owner = owner
+    self.group = group
+    self.mode = mode
+    self.size = size
+    self.data = data
+
+
+class ArFile(object):
+  def __init__(self, filename):
+    self.filename = filename
+    self._file = builtin_open(filename, 'rb')
+    magic = self._file.read(len(MAGIC))
+    if MAGIC != magic:
+      raise ArError('not an ar file: ' + filename)
+    self.members = []
+    self.members_map = {}
+    self.offset_to_info = {}
+
+  def _read_member(self):
+    offset = self._file.tell()
+    name = self._file.read(16)
+    if len(name) == 0:
+      return None
+    name = name.strip()
+    timestamp = self._file.read(12).strip()
+    owner = self._file.read(6).strip()
+    group = self._file.read(6).strip()
+    mode = self._file.read(8).strip()
+    size = int(self._file.read(10))
+    ending = self._file.read(2)
+    if ending != b'\x60\n':
+      raise ArError('invalid ar header')
+    data = self._file.read(size)
+    if mode.strip():
+      mode = int(mode)
+    if owner.strip():
+      owner = int(owner)
+    if group.strip():
+      group = int(group)
+    if size % 2:
+      if self._file.read(1) != '\n':
+        raise ArError('invalid ar header')
+
+    return ArInfo(name.decode('utf-8'), offset, timestamp, owner, group, mode, size, data)
+
+  def next(self):
+    while True:
+      # Keep reading entries until we find a non-special one
+      info = self._read_member()
+      if not info:
+        return None
+      if info.name == '//':
+        # Special file containing long filenames
+        self.name_data = info.data
+      elif info.name == '/':
+        # Special file containing symbol table
+        num_entries = struct.unpack('>I', info.data[:4])[0]
+        self.sym_offsets = struct.unpack('>%dI' % num_entries, info.data[4:4 + 4 * num_entries])
+        symbol_data = info.data[4 + 4 * num_entries:-1]
+        symbol_data = symbol_data.rstrip(b'\0')
+        if symbol_data:
+          self.symbols = symbol_data.split(b'\0')
+        else:
+          self.symbols = []
+        if len(self.symbols) != num_entries:
+          raise ArError('invalid symbol table')
+      else:
+        break
+
+    # This entry has a name from the "//" name section.
+    if info.name[0] == '/':
+      name_offset = int(info.name[1:])
+      if name_offset < 0 or name_offset >= len(self.name_data):
+        raise ArError('invalid extended filename section')
+      name_end = self.name_data.find(b'\n', name_offset)
+      info.name = self.name_data[name_offset:name_end].decode('utf-8')
+    info.name = info.name.rstrip('/')
+    self.members.append(info)
+    self.members_map[info.name] = info
+    self.offset_to_info[info.offset] = info
+    return info
+
+  def getsymbols(self):
+    return zip(self.symbols, self.sym_offsets)
+
+  def getmember(self, id):
+    """Polymophic member accessor that takes either and index or a name."""
+    if isinstance(id, int):
+      return self.getmember_by_index(id)
+    return self.getmember_by_name(id)
+
+  def getmember_by_name(self, name):
+    self.getmembers()
+    return self.members_map[name]
+
+  def getmember_by_index(self, index):
+    self.getmembers()
+    return self.members[index]
+
+  def getmembers(self):
+    while self.next():
+      pass
+    return self.members
+
+  def list(self):
+    for m in self.getmembers():
+      sys.stdout.write(m.name + '\n')
+
+  def extractall(self, path="."):
+    names_written = set()
+    for m in self.getmembers():
+      filename = m.name
+      if filename in names_written:
+        basename = filename
+        count = 1
+        while filename in names_written:
+          filename = basename + '.' + str(count)
+          count += 1
+
+      names_written.add(filename)
+      full_name = os.path.join(path, filename)
+      with builtin_open(full_name, 'wb') as f:
+        f.write(m.data)
+
+    return sorted(list(names_written))
+
+  def close(self):
+    self._file.close()
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, type, value, traceback):
+    self.close()
+
+
+def open(filename):
+  return ArFile(filename)
+
+
+def is_arfile(filename):
+  """Return True if name points to a ar archive that we
+  are able to handle, else return False.
+  """
+  try:
+    t = open(filename)
+    t.close()
+    return True
+  except ArError:
+    return False
+
+
+if __name__ == '__main__':
+  open(sys.argv[1]).list()
+  open(sys.argv[1]).extractall()
diff --git a/tools/shared.py b/tools/shared.py
index 666f8176ef1dc..d518e815557c6 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -6,7 +6,7 @@
 from __future__ import print_function
 
 from distutils.spawn import find_executable
-from subprocess import PIPE, STDOUT
+from subprocess import PIPE, STDOUT # noqa
 import atexit
 import base64
 import difflib
@@ -32,6 +32,7 @@
 
 from .toolchain_profiler import ToolchainProfiler
 from .tempfiles import try_delete
+from . import arfile
 from . import jsrun, cache, tempfiles, colored_logger
 from . import response_file
 
@@ -1301,60 +1302,22 @@ def verify_settings():
 verify_settings()
 
 
-# llvm-ar appears to just use basenames inside archives. as a result, files with the same basename
-# will trample each other when we extract them. to help warn of such situations, we warn if there
-# are duplicate entries in the archive
-def warn_if_duplicate_entries(archive_contents, archive_filename):
-  if len(archive_contents) != len(set(archive_contents)):
-    logger.warning('%s: archive file contains duplicate entries. This is not supported by emscripten. Only the last member with a given name will be linked in which can result in undefined symbols. You should either rename your source files, or use `emar` to create you archives which works around this issue.' % archive_filename)
-    warned = set()
-    for i in range(len(archive_contents)):
-      curr = archive_contents[i]
-      if curr not in warned and curr in archive_contents[i + 1:]:
-        logger.warning('   duplicate: %s' % curr)
-        warned.add(curr)
-
-
 # This function creates a temporary directory specified by the 'dir' field in
 # the returned dictionary. Caller is responsible for cleaning up those files
 # after done.
 def extract_archive_contents(archive_file):
-  lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines()
-  # ignore empty lines
-  contents = [l for l in lines if len(l)]
-  if len(contents) == 0:
-    logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file)
-    return {
-      'returncode': 0,
-      'dir': None,
-      'files': []
-    }
-
-  # `ar` files can only contains filenames. Just to be sure,  verify that each
-  # file has only as filename component and is not absolute
-  for f in contents:
-    assert not os.path.dirname(f)
-    assert not os.path.isabs(f)
-
-  warn_if_duplicate_entries(contents, archive_file)
-
   # create temp dir
   temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_')
 
-  # extract file in temp dir
-  proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir)
-  abs_contents = [os.path.join(temp_dir, c) for c in contents]
-
-  # check that all files were created
-  missing_contents = [x for x in abs_contents if not os.path.exists(x)]
-  if missing_contents:
-    exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout))
+  try:
+    with arfile.open(archive_file) as f:
+      contents = f.extractall(temp_dir)
+  except arfile.ArError as e:
+    logging.error(str(e))
+    return archive_file, [], temp_dir, False
 
-  return {
-    'returncode': proc.returncode,
-    'dir': temp_dir,
-    'files': abs_contents
-  }
+  abs_contents = [os.path.join(temp_dir, c) for c in contents]
+  return archive_file, abs_contents, temp_dir, True
 
 
 class ObjectFileInfo(object):
@@ -1757,20 +1720,18 @@ def read_link_inputs(files):
       pool = Building.get_multiprocessing_pool()
       object_names_in_archives = pool.map(extract_archive_contents, archive_names)
 
-      def clean_temporary_archive_contents_directory(directory):
+      def clean_temporary_directory(directory):
         def clean_at_exit():
           try_delete(directory)
         if directory:
           atexit.register(clean_at_exit)
 
-      for n in range(len(archive_names)):
-        if object_names_in_archives[n]['returncode'] != 0:
-          raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!')
-        Building.ar_contents[archive_names[n]] = object_names_in_archives[n]['files']
-        clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir'])
-
-      for o in object_names_in_archives:
-        for f in o['files']:
+      for name, files, tmpdir, success in object_names_in_archives:
+        if not success:
+          exit_with_error('failed to extract archive: ' + name)
+        Building.ar_contents[name] = files
+        clean_temporary_directory(tmpdir)
+        for f in files:
           if f not in Building.uninternal_nm_cache:
             object_names.append(f)