|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import hashlib |
| 4 | +import sys |
| 5 | +from contextlib import suppress |
| 6 | +from pathlib import Path |
| 7 | +from typing import Any |
| 8 | + |
| 9 | + |
| 10 | +if sys.version_info >= (3, 11): |
| 11 | + from hashlib import file_digest |
| 12 | +else: |
| 13 | + # This tuple and __get_builtin_constructor() must be modified if a new |
| 14 | + # always available algorithm is added. |
| 15 | + __always_supported = ( |
| 16 | + "md5", |
| 17 | + "sha1", |
| 18 | + "sha224", |
| 19 | + "sha256", |
| 20 | + "sha384", |
| 21 | + "sha512", |
| 22 | + "blake2b", |
| 23 | + "blake2s", |
| 24 | + "sha3_224", |
| 25 | + "sha3_256", |
| 26 | + "sha3_384", |
| 27 | + "sha3_512", |
| 28 | + "shake_128", |
| 29 | + "shake_256", |
| 30 | + ) |
| 31 | + |
| 32 | + algorithms_guaranteed = set(__always_supported) |
| 33 | + algorithms_available = set(__always_supported) |
| 34 | + |
| 35 | + __all__ = __always_supported + ( |
| 36 | + "new", |
| 37 | + "algorithms_guaranteed", |
| 38 | + "algorithms_available", |
| 39 | + "file_digest", |
| 40 | + ) |
| 41 | + |
| 42 | + __builtin_constructor_cache = {} |
| 43 | + |
| 44 | + # Prefer our blake2 implementation |
| 45 | + # OpenSSL 1.1.0 comes with a limited implementation of blake2b/s. The OpenSSL |
| 46 | + # implementations neither support keyed blake2 (blake2 MAC) nor advanced |
| 47 | + # features like salt, personalization, or tree hashing. OpenSSL hash-only |
| 48 | + # variants are available as 'blake2b512' and 'blake2s256', though. |
| 49 | + __block_openssl_constructor = { |
| 50 | + "blake2b", |
| 51 | + "blake2s", |
| 52 | + } |
| 53 | + |
| 54 | + def __get_builtin_constructor(name): |
| 55 | + cache = __builtin_constructor_cache |
| 56 | + constructor = cache.get(name) |
| 57 | + if constructor is not None: |
| 58 | + return constructor |
| 59 | + |
| 60 | + with suppress(ImportError): |
| 61 | + if name in {"SHA1", "sha1"}: |
| 62 | + import _sha1 |
| 63 | + |
| 64 | + cache["SHA1"] = cache["sha1"] = _sha1.sha1 |
| 65 | + elif name in {"MD5", "md5"}: |
| 66 | + import _md5 |
| 67 | + |
| 68 | + cache["MD5"] = cache["md5"] = _md5.md5 |
| 69 | + elif name in {"SHA256", "sha256", "SHA224", "sha224"}: |
| 70 | + try: |
| 71 | + import _sha2 |
| 72 | + except ImportError: |
| 73 | + import _sha256 as _sha2 |
| 74 | + cache["SHA224"] = cache["sha224"] = _sha2.sha224 |
| 75 | + cache["SHA256"] = cache["sha256"] = _sha2.sha256 |
| 76 | + elif name in {"SHA512", "sha512", "SHA384", "sha384"}: |
| 77 | + try: |
| 78 | + import _sha2 |
| 79 | + except ImportError: |
| 80 | + import _sha256 as _sha2 |
| 81 | + cache["SHA384"] = cache["sha384"] = _sha2.sha384 |
| 82 | + cache["SHA512"] = cache["sha512"] = _sha2.sha512 |
| 83 | + elif name in {"blake2b", "blake2s"}: |
| 84 | + import _blake2 |
| 85 | + |
| 86 | + cache["blake2b"] = _blake2.blake2b |
| 87 | + cache["blake2s"] = _blake2.blake2s |
| 88 | + elif name in {"sha3_224", "sha3_256", "sha3_384", "sha3_512"}: |
| 89 | + import _sha3 |
| 90 | + |
| 91 | + cache["sha3_224"] = _sha3.sha3_224 |
| 92 | + cache["sha3_256"] = _sha3.sha3_256 |
| 93 | + cache["sha3_384"] = _sha3.sha3_384 |
| 94 | + cache["sha3_512"] = _sha3.sha3_512 |
| 95 | + elif name in {"shake_128", "shake_256"}: |
| 96 | + import _sha3 |
| 97 | + |
| 98 | + cache["shake_128"] = _sha3.shake_128 |
| 99 | + cache["shake_256"] = _sha3.shake_256 |
| 100 | + |
| 101 | + constructor = cache.get(name) |
| 102 | + if constructor is not None: |
| 103 | + return constructor |
| 104 | + |
| 105 | + raise ValueError("unsupported hash type " + name) |
| 106 | + |
| 107 | + def __get_openssl_constructor(name): |
| 108 | + if name in __block_openssl_constructor: |
| 109 | + # Prefer our builtin blake2 implementation. |
| 110 | + return __get_builtin_constructor(name) |
| 111 | + try: |
| 112 | + # MD5, SHA1, and SHA2 are in all supported OpenSSL versions |
| 113 | + # SHA3/shake are available in OpenSSL 1.1.1+ |
| 114 | + f = getattr(_hashlib, "openssl_" + name) |
| 115 | + # Allow the C module to raise ValueError. The function will be |
| 116 | + # defined but the hash not actually available. Don't fall back to |
| 117 | + # builtin if the current security policy blocks a digest, bpo#40695. |
| 118 | + f(usedforsecurity=False) |
| 119 | + # Use the C function directly (very fast) |
| 120 | + return f |
| 121 | + except (AttributeError, ValueError): |
| 122 | + return __get_builtin_constructor(name) |
| 123 | + |
| 124 | + def __py_new(name, data=b"", **kwargs): |
| 125 | + """new(name, data=b'', **kwargs) - Return a new hashing object using the |
| 126 | + named algorithm; optionally initialized with data (which must be |
| 127 | + a bytes-like object). |
| 128 | + """ |
| 129 | + return __get_builtin_constructor(name)(data, **kwargs) |
| 130 | + |
| 131 | + def __hash_new(name, data=b"", **kwargs): |
| 132 | + """new(name, data=b'') - Return a new hashing object using the named algorithm; |
| 133 | + optionally initialized with data (which must be a bytes-like object). |
| 134 | + """ |
| 135 | + if name in __block_openssl_constructor: |
| 136 | + # Prefer our builtin blake2 implementation. |
| 137 | + return __get_builtin_constructor(name)(data, **kwargs) |
| 138 | + try: |
| 139 | + return _hashlib.new(name, data, **kwargs) |
| 140 | + except ValueError: |
| 141 | + # If the _hashlib module (OpenSSL) doesn't support the named |
| 142 | + # hash, try using our builtin implementations. |
| 143 | + # This allows for SHA224/256 and SHA384/512 support even though |
| 144 | + # the OpenSSL library prior to 0.9.8 doesn't provide them. |
| 145 | + return __get_builtin_constructor(name)(data) |
| 146 | + |
| 147 | + try: |
| 148 | + import _hashlib |
| 149 | + |
| 150 | + new = __hash_new |
| 151 | + __get_hash = __get_openssl_constructor |
| 152 | + algorithms_available = algorithms_available.union( |
| 153 | + _hashlib.openssl_md_meth_names |
| 154 | + ) |
| 155 | + except ImportError: |
| 156 | + _hashlib = None |
| 157 | + new = __py_new |
| 158 | + __get_hash = __get_builtin_constructor |
| 159 | + |
| 160 | + with suppress(ImportError): |
| 161 | + # OpenSSL's PKCS5_PBKDF2_HMAC requires OpenSSL 1.0+ with HMAC and SHA |
| 162 | + from _hashlib import pbkdf2_hmac |
| 163 | + |
| 164 | + __all__ += ("pbkdf2_hmac",) |
| 165 | + |
| 166 | + with suppress(ImportError): |
| 167 | + # OpenSSL's scrypt requires OpenSSL 1.1+ |
| 168 | + from _hashlib import scrypt |
| 169 | + |
| 170 | + def file_digest(fileobj, digest, /, *, _bufsize=2**18): |
| 171 | + """Hash the contents of a file-like object. Returns a digest object. |
| 172 | +
|
| 173 | + *fileobj* must be a file-like object opened for reading in binary mode. |
| 174 | + It accepts file objects from open(), io.BytesIO(), and SocketIO objects. |
| 175 | + The function may bypass Python's I/O and use the file descriptor *fileno* |
| 176 | + directly. |
| 177 | +
|
| 178 | + *digest* must either be a hash algorithm name as a *str*, a hash |
| 179 | + constructor, or a callable that returns a hash object. |
| 180 | + """ |
| 181 | + # On Linux we could use AF_ALG sockets and sendfile() to archive zero-copy |
| 182 | + # hashing with hardware acceleration. |
| 183 | + if isinstance(digest, str): |
| 184 | + digestobj = new(digest) |
| 185 | + else: |
| 186 | + digestobj = digest() |
| 187 | + |
| 188 | + if hasattr(fileobj, "getbuffer"): |
| 189 | + # io.BytesIO object, use zero-copy buffer |
| 190 | + digestobj.update(fileobj.getbuffer()) |
| 191 | + return digestobj |
| 192 | + |
| 193 | + # Only binary files implement readinto(). |
| 194 | + if not ( |
| 195 | + hasattr(fileobj, "readinto") |
| 196 | + and hasattr(fileobj, "readable") |
| 197 | + and fileobj.readable() |
| 198 | + ): |
| 199 | + raise ValueError( |
| 200 | + f"'{fileobj!r}' is not a file-like object in binary reading mode." |
| 201 | + ) |
| 202 | + |
| 203 | + # binary file, socket.SocketIO object |
| 204 | + # Note: socket I/O uses different syscalls than file I/O. |
| 205 | + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. |
| 206 | + view = memoryview(buf) |
| 207 | + while True: |
| 208 | + size = fileobj.readinto(buf) |
| 209 | + if size == 0: |
| 210 | + break # EOF |
| 211 | + digestobj.update(view[:size]) |
| 212 | + |
| 213 | + return digestobj |
| 214 | + |
| 215 | + |
| 216 | +def hash_value(value: Any) -> int | str: |
| 217 | + """Hash values. |
| 218 | +
|
| 219 | + Compute the hash of paths, strings, and bytes with a hash function or otherwise the |
| 220 | + hashes are salted. |
| 221 | +
|
| 222 | + """ |
| 223 | + if isinstance(value, Path): |
| 224 | + value = str(value) |
| 225 | + if isinstance(value, str): |
| 226 | + value = value.encode() |
| 227 | + if isinstance(value, bytes): |
| 228 | + return str(hashlib.sha256(value).hexdigest()) |
| 229 | + return hash(value) |
0 commit comments