Skip to content

Commit 2e5d8a9

Browse files
authored
gh-99108: Release the GIL around hashlib built-in computation (#104675)
This matches the GIL releasing behavior of our existing `_hashopenssl` module, extending it to the HACL* built-ins. Includes adding comments to better describe the ENTER/LEAVE macros purpose and explain the lock strategy in both existing and new code.
1 parent 988c1f6 commit 2e5d8a9

File tree

7 files changed

+207
-15
lines changed

7 files changed

+207
-15
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
We now release the GIL around built-in :mod:`hashlib` computations of
2+
reasonable size for the SHA families and MD5 hash functions, matching
3+
what our OpenSSL backed hash computations already does.

Modules/_hashopenssl.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,16 @@ get_hashlib_state(PyObject *module)
227227
typedef struct {
228228
PyObject_HEAD
229229
EVP_MD_CTX *ctx; /* OpenSSL message digest context */
230+
// Prevents undefined behavior via multiple threads entering the C API.
231+
// The lock will be NULL before threaded access has been enabled.
230232
PyThread_type_lock lock; /* OpenSSL context lock */
231233
} EVPobject;
232234

233235
typedef struct {
234236
PyObject_HEAD
235237
HMAC_CTX *ctx; /* OpenSSL hmac context */
238+
// Prevents undefined behavior via multiple threads entering the C API.
239+
// The lock will be NULL before threaded access has been enabled.
236240
PyThread_type_lock lock; /* HMAC context lock */
237241
} HMACobject;
238242

@@ -896,6 +900,8 @@ py_evp_fromname(PyObject *module, const char *digestname, PyObject *data_obj,
896900

897901
if (view.buf && view.len) {
898902
if (view.len >= HASHLIB_GIL_MINSIZE) {
903+
/* We do not initialize self->lock here as this is the constructor
904+
* where it is not yet possible to have concurrent access. */
899905
Py_BEGIN_ALLOW_THREADS
900906
result = EVP_hash(self, view.buf, view.len);
901907
Py_END_ALLOW_THREADS

Modules/hashlib.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@
3737
* LEAVE_HASHLIB block or explicitly acquire and release the lock inside
3838
* a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for
3939
* an operation.
40+
*
41+
* These only drop the GIL if the lock acquisition itself is likely to
42+
* block. Thus the non-blocking acquire gating the GIL release for a
43+
* blocking lock acquisition. The intent of these macros is to surround
44+
* the assumed always "fast" operations that you aren't releasing the
45+
* GIL around. Otherwise use code similar to what you see in hash
46+
* function update() methods.
4047
*/
4148

4249
#include "pythread.h"
@@ -53,7 +60,7 @@
5360
PyThread_release_lock((obj)->lock); \
5461
}
5562

56-
/* TODO(gps): We should probably make this a module or EVPobject attribute
63+
/* TODO(gpshead): We should make this a module or class attribute
5764
* to allow the user to optimize based on the platform they're using. */
5865
#define HASHLIB_GIL_MINSIZE 2048
5966

Modules/md5module.c

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ typedef long long MD5_INT64; /* 64-bit integer */
4949

5050
typedef struct {
5151
PyObject_HEAD
52-
52+
// Prevents undefined behavior via multiple threads entering the C API.
53+
// The lock will be NULL before threaded access has been enabled.
54+
PyThread_type_lock lock;
5355
Hacl_Streaming_MD5_state *hash_state;
5456
} MD5object;
5557

@@ -72,6 +74,7 @@ static MD5object *
7274
newMD5object(MD5State * st)
7375
{
7476
MD5object *md5 = (MD5object *)PyObject_GC_New(MD5object, st->md5_type);
77+
md5->lock = NULL;
7578
PyObject_GC_Track(md5);
7679
return md5;
7780
}
@@ -88,6 +91,9 @@ static void
8891
MD5_dealloc(MD5object *ptr)
8992
{
9093
Hacl_Streaming_MD5_legacy_free(ptr->hash_state);
94+
if (ptr->lock != NULL) {
95+
PyThread_free_lock(ptr->lock);
96+
}
9197
PyTypeObject *tp = Py_TYPE(ptr);
9298
PyObject_GC_UnTrack(ptr);
9399
PyObject_GC_Del(ptr);
@@ -115,7 +121,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
115121
if ((newobj = newMD5object(st))==NULL)
116122
return NULL;
117123

124+
ENTER_HASHLIB(self);
118125
newobj->hash_state = Hacl_Streaming_MD5_legacy_copy(self->hash_state);
126+
LEAVE_HASHLIB(self);
119127
return (PyObject *)newobj;
120128
}
121129

@@ -130,7 +138,9 @@ MD5Type_digest_impl(MD5object *self)
130138
/*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
131139
{
132140
unsigned char digest[MD5_DIGESTSIZE];
141+
ENTER_HASHLIB(self);
133142
Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest);
143+
LEAVE_HASHLIB(self);
134144
return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
135145
}
136146

@@ -145,7 +155,9 @@ MD5Type_hexdigest_impl(MD5object *self)
145155
/*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
146156
{
147157
unsigned char digest[MD5_DIGESTSIZE];
158+
ENTER_HASHLIB(self);
148159
Hacl_Streaming_MD5_legacy_finish(self->hash_state, digest);
160+
LEAVE_HASHLIB(self);
149161
return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
150162
}
151163

@@ -177,7 +189,18 @@ MD5Type_update(MD5object *self, PyObject *obj)
177189

178190
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
179191

180-
update(self->hash_state, buf.buf, buf.len);
192+
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
193+
self->lock = PyThread_allocate_lock();
194+
}
195+
if (self->lock != NULL) {
196+
Py_BEGIN_ALLOW_THREADS
197+
PyThread_acquire_lock(self->lock, 1);
198+
update(self->hash_state, buf.buf, buf.len);
199+
PyThread_release_lock(self->lock);
200+
Py_END_ALLOW_THREADS
201+
} else {
202+
update(self->hash_state, buf.buf, buf.len);
203+
}
181204

182205
PyBuffer_Release(&buf);
183206
Py_RETURN_NONE;
@@ -279,7 +302,15 @@ _md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity)
279302
return NULL;
280303
}
281304
if (string) {
282-
update(new->hash_state, buf.buf, buf.len);
305+
if (buf.len >= HASHLIB_GIL_MINSIZE) {
306+
/* We do not initialize self->lock here as this is the constructor
307+
* where it is not yet possible to have concurrent access. */
308+
Py_BEGIN_ALLOW_THREADS
309+
update(new->hash_state, buf.buf, buf.len);
310+
Py_END_ALLOW_THREADS
311+
} else {
312+
update(new->hash_state, buf.buf, buf.len);
313+
}
283314
PyBuffer_Release(&buf);
284315
}
285316

Modules/sha1module.c

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ typedef long long SHA1_INT64; /* 64-bit integer */
4848

4949
typedef struct {
5050
PyObject_HEAD
51-
51+
// Prevents undefined behavior via multiple threads entering the C API.
52+
// The lock will be NULL before threaded access has been enabled.
53+
PyThread_type_lock lock;
5254
Hacl_Streaming_SHA1_state *hash_state;
5355
} SHA1object;
5456

@@ -71,6 +73,7 @@ static SHA1object *
7173
newSHA1object(SHA1State *st)
7274
{
7375
SHA1object *sha = (SHA1object *)PyObject_GC_New(SHA1object, st->sha1_type);
76+
sha->lock = NULL;
7477
PyObject_GC_Track(sha);
7578
return sha;
7679
}
@@ -88,6 +91,9 @@ static void
8891
SHA1_dealloc(SHA1object *ptr)
8992
{
9093
Hacl_Streaming_SHA1_legacy_free(ptr->hash_state);
94+
if (ptr->lock != NULL) {
95+
PyThread_free_lock(ptr->lock);
96+
}
9197
PyTypeObject *tp = Py_TYPE(ptr);
9298
PyObject_GC_UnTrack(ptr);
9399
PyObject_GC_Del(ptr);
@@ -115,7 +121,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls)
115121
if ((newobj = newSHA1object(st)) == NULL)
116122
return NULL;
117123

124+
ENTER_HASHLIB(self);
118125
newobj->hash_state = Hacl_Streaming_SHA1_legacy_copy(self->hash_state);
126+
LEAVE_HASHLIB(self);
119127
return (PyObject *)newobj;
120128
}
121129

@@ -130,7 +138,9 @@ SHA1Type_digest_impl(SHA1object *self)
130138
/*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/
131139
{
132140
unsigned char digest[SHA1_DIGESTSIZE];
141+
ENTER_HASHLIB(self);
133142
Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest);
143+
LEAVE_HASHLIB(self);
134144
return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE);
135145
}
136146

@@ -145,7 +155,9 @@ SHA1Type_hexdigest_impl(SHA1object *self)
145155
/*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/
146156
{
147157
unsigned char digest[SHA1_DIGESTSIZE];
158+
ENTER_HASHLIB(self);
148159
Hacl_Streaming_SHA1_legacy_finish(self->hash_state, digest);
160+
LEAVE_HASHLIB(self);
149161
return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE);
150162
}
151163

@@ -177,7 +189,18 @@ SHA1Type_update(SHA1object *self, PyObject *obj)
177189

178190
GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
179191

180-
update(self->hash_state, buf.buf, buf.len);
192+
if (self->lock == NULL && buf.len >= HASHLIB_GIL_MINSIZE) {
193+
self->lock = PyThread_allocate_lock();
194+
}
195+
if (self->lock != NULL) {
196+
Py_BEGIN_ALLOW_THREADS
197+
PyThread_acquire_lock(self->lock, 1);
198+
update(self->hash_state, buf.buf, buf.len);
199+
PyThread_release_lock(self->lock);
200+
Py_END_ALLOW_THREADS
201+
} else {
202+
update(self->hash_state, buf.buf, buf.len);
203+
}
181204

182205
PyBuffer_Release(&buf);
183206
Py_RETURN_NONE;
@@ -279,7 +302,15 @@ _sha1_sha1_impl(PyObject *module, PyObject *string, int usedforsecurity)
279302
return NULL;
280303
}
281304
if (string) {
282-
update(new->hash_state, buf.buf, buf.len);
305+
if (buf.len >= HASHLIB_GIL_MINSIZE) {
306+
/* We do not initialize self->lock here as this is the constructor
307+
* where it is not yet possible to have concurrent access. */
308+
Py_BEGIN_ALLOW_THREADS
309+
update(new->hash_state, buf.buf, buf.len);
310+
Py_END_ALLOW_THREADS
311+
} else {
312+
update(new->hash_state, buf.buf, buf.len);
313+
}
283314
PyBuffer_Release(&buf);
284315
}
285316

0 commit comments

Comments
 (0)