Skip to content

Commit 8571f7d

Browse files
authored
Refactor calls to md5 to go through a helper (#8250)
1 parent 7c5e69b commit 8571f7d

File tree

6 files changed

+36
-26
lines changed

6 files changed

+36
-26
lines changed

mypy/build.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import contextlib
1414
import errno
1515
import gc
16-
import hashlib
1716
import json
1817
import os
1918
import pathlib
@@ -37,7 +36,7 @@
3736
from mypy.errors import Errors, CompileError, ErrorInfo, report_internal_error
3837
from mypy.util import (
3938
DecodeError, decode_python_encoding, is_sub_path, get_mypy_comments, module_prefix,
40-
read_py_file
39+
read_py_file, hash_digest,
4140
)
4241
if TYPE_CHECKING:
4342
from mypy.report import Reports # Avoid unconditional slow import
@@ -468,7 +467,7 @@ def take_module_snapshot(module: types.ModuleType) -> str:
468467
"""
469468
if hasattr(module, '__file__'):
470469
with open(module.__file__, 'rb') as f:
471-
digest = hashlib.md5(f.read()).hexdigest()
470+
digest = hash_digest(f.read())
472471
else:
473472
digest = 'unknown'
474473
ver = getattr(module, '__version__', 'none')
@@ -1262,9 +1261,9 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
12621261
# coarse-grained incremental rebuild, so we accept the cache
12631262
# metadata even if it doesn't match the source file.
12641263
#
1265-
# We still *do* the mtime/md5 checks, however, to enable
1264+
# We still *do* the mtime/hash checks, however, to enable
12661265
# fine-grained mode to take advantage of the mtime-updating
1267-
# optimization when mtimes differ but md5s match. There is
1266+
# optimization when mtimes differ but hashes match. There is
12681267
# essentially no extra time cost to computing the hash here, since
12691268
# it will be cached and will be needed for finding changed files
12701269
# later anyways.
@@ -1292,7 +1291,7 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
12921291

12931292
t0 = time.time()
12941293
try:
1295-
source_hash = manager.fscache.md5(path)
1294+
source_hash = manager.fscache.hash_digest(path)
12961295
except (OSError, UnicodeDecodeError, DecodeError):
12971296
return None
12981297
manager.add_stats(validate_hash_time=time.time() - t0)
@@ -1346,10 +1345,12 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
13461345

13471346

13481347
def compute_hash(text: str) -> str:
1349-
# We use md5 instead of the builtin hash(...) function because the output of hash(...)
1350-
# can differ between runs due to hash randomization (enabled by default in Python 3.3).
1351-
# See the note in https://docs.python.org/3/reference/datamodel.html#object.__hash__.
1352-
return hashlib.md5(text.encode('utf-8')).hexdigest()
1348+
# We use a crypto hash instead of the builtin hash(...) function
1349+
# because the output of hash(...) can differ between runs due to
1350+
# hash randomization (enabled by default in Python 3.3). See the
1351+
# note in
1352+
# https://docs.python.org/3/reference/datamodel.html#object.__hash__.
1353+
return hash_digest(text.encode('utf-8'))
13531354

13541355

13551356
def json_dumps(obj: Any, debug_cache: bool) -> str:
@@ -1982,7 +1983,7 @@ def parse_file(self) -> None:
19821983
path = manager.maybe_swap_for_shadow_path(self.path)
19831984
source = decode_python_encoding(manager.fscache.read(path),
19841985
manager.options.python_version)
1985-
self.source_hash = manager.fscache.md5(path)
1986+
self.source_hash = manager.fscache.hash_digest(path)
19861987
except IOError as ioerr:
19871988
# ioerr.strerror differs for os.stat failures between Windows and
19881989
# other systems, but os.strerror(ioerr.errno) does not, so we use that.

mypy/dmypy_server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def initialize_fine_grained(self, sources: List[BuildSource],
423423
assert state.path is not None
424424
self.fswatcher.set_file_data(
425425
state.path,
426-
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))
426+
FileData(st_mtime=float(meta.mtime), st_size=meta.size, hash=meta.hash))
427427

428428
changed, removed = self.find_changed(sources)
429429

mypy/fscache.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
advantage of the benefits.
2929
"""
3030

31-
import hashlib
3231
import os
3332
import stat
3433
from typing import Dict, List, Set
34+
from mypy.util import hash_digest
3535

3636

3737
class FileSystemCache:
@@ -256,12 +256,11 @@ def read(self, path: str) -> bytes:
256256
self.read_error_cache[path] = err
257257
raise
258258

259-
md5hash = hashlib.md5(data).hexdigest()
260259
self.read_cache[path] = data
261-
self.hash_cache[path] = md5hash
260+
self.hash_cache[path] = hash_digest(data)
262261
return data
263262

264-
def md5(self, path: str) -> str:
263+
def hash_digest(self, path: str) -> str:
265264
if path not in self.hash_cache:
266265
self.read(path)
267266
return self.hash_cache[path]

mypy/fswatcher.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
FileData = NamedTuple('FileData', [('st_mtime', float),
88
('st_size', int),
9-
('md5', str)])
9+
('hash', str)])
1010

1111

1212
class FileSystemWatcher:
1313
"""Watcher for file system changes among specific paths.
1414
1515
All file system access is performed using FileSystemCache. We
16-
detect changed files by stat()ing them all and comparing md5 hashes
16+
detect changed files by stat()ing them all and comparing hashes
1717
of potentially changed files. If a file has both size and mtime
1818
unmodified, the file is assumed to be unchanged.
1919
@@ -54,8 +54,8 @@ def remove_watched_paths(self, paths: Iterable[str]) -> None:
5454

5555
def _update(self, path: str) -> None:
5656
st = self.fs.stat(path)
57-
md5 = self.fs.md5(path)
58-
self._file_data[path] = FileData(st.st_mtime, st.st_size, md5)
57+
hash_digest = self.fs.hash_digest(path)
58+
self._file_data[path] = FileData(st.st_mtime, st.st_size, hash_digest)
5959

6060
def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
6161
changed = set()
@@ -76,10 +76,10 @@ def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
7676
# Round mtimes down, to match the mtimes we write to meta files
7777
elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime):
7878
# Only look for changes if size or mtime has changed as an
79-
# optimization, since calculating md5 is expensive.
80-
new_md5 = self.fs.md5(path)
79+
# optimization, since calculating hash is expensive.
80+
new_hash = self.fs.hash_digest(path)
8181
self._update(path)
82-
if st.st_size != old.st_size or new_md5 != old.md5:
82+
if st.st_size != old.st_size or new_hash != old.hash:
8383
# Changed file.
8484
changed.add(path)
8585
return changed

mypy/util.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
import subprocess
66
import sys
7-
import os
7+
import hashlib
88

99
from typing import (
1010
TypeVar, List, Tuple, Optional, Dict, Sequence, Iterable, Container, IO, Callable
@@ -469,6 +469,16 @@ def soft_wrap(msg: str, max_len: int, first_offset: int,
469469
return padding.join(lines)
470470

471471

472+
def hash_digest(data: bytes) -> str:
473+
"""Compute a hash digest of some data.
474+
475+
We use a cryptographic hash because we want a low probability of
476+
accidental collision, but we don't really care about any of the
477+
cryptographic properties.
478+
"""
479+
return hashlib.md5(data).hexdigest()
480+
481+
472482
class FancyFormatter:
473483
"""Apply color and bold font to terminal output.
474484

mypyc/emitmodule.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# single module and it should be renamed.
55

66
import os
7-
import hashlib
87
import json
98
from collections import OrderedDict
109
from typing import List, Tuple, Dict, Iterable, Set, TypeVar, Optional
@@ -18,6 +17,7 @@
1817
from mypy.options import Options
1918
from mypy.plugin import Plugin, ReportConfigContext
2019
from mypy.fscache import FileSystemCache
20+
from mypy.util import hash_digest
2121

2222
from mypyc import genops
2323
from mypyc.common import (
@@ -144,7 +144,7 @@ def report_config_data(
144144
contents = f.read()
145145
except FileNotFoundError:
146146
return None
147-
real_hash = hashlib.md5(contents).hexdigest()
147+
real_hash = hash_digest(contents)
148148
if hash != real_hash:
149149
return None
150150

0 commit comments

Comments
 (0)