Skip to content

Commit 4916d67

Browse files
committed
gh-108314: Add PyDict_ContainsString() function
* The new function is not part of the limited C API. * Use PyDict_ContainsString() in pylifecycle.c and pythonrun.c.
1 parent 154477b commit 4916d67

File tree

9 files changed

+91
-21
lines changed

9 files changed

+91
-21
lines changed

Doc/c-api/dict.rst

+16-4
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ Dictionary Objects
5555
This is equivalent to the Python expression ``key in p``.
5656
5757
58+
.. c:function:: int PyDict_ContainsString(PyObject *p, const char *key)
59+
60+
This is the same as :c:func:`PyDict_Contains`, but *key* is specified as a
61+
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
62+
:c:expr:`PyObject*`.
63+
64+
.. versionadded:: 3.13
65+
66+
5867
.. c:function:: PyObject* PyDict_Copy(PyObject *p)
5968
6069
Return a new dictionary that contains the same key-value pairs as *p*.
@@ -73,7 +82,7 @@ Dictionary Objects
7382
.. index:: single: PyUnicode_FromString()
7483
7584
Insert *val* into the dictionary *p* using *key* as a key. *key* should
76-
be a :c:expr:`const char*`. The key object is created using
85+
be a :c:expr:`const char*` UTF-8 encoded bytes string. The key object is created using
7786
``PyUnicode_FromString(key)``. Return ``0`` on success or ``-1`` on
7887
failure. This function *does not* steal a reference to *val*.
7988
@@ -88,7 +97,8 @@ Dictionary Objects
8897
8998
.. c:function:: int PyDict_DelItemString(PyObject *p, const char *key)
9099
91-
Remove the entry in dictionary *p* which has a key specified by the string *key*.
100+
Remove the entry in dictionary *p* which has a key specified by the UTF-8
101+
encoded bytes string *key*.
92102
If *key* is not in the dictionary, :exc:`KeyError` is raised.
93103
Return ``0`` on success or ``-1`` on failure.
94104
@@ -136,7 +146,8 @@ Dictionary Objects
136146
.. c:function:: PyObject* PyDict_GetItemString(PyObject *p, const char *key)
137147
138148
This is the same as :c:func:`PyDict_GetItem`, but *key* is specified as a
139-
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
149+
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
150+
:c:expr:`PyObject*`.
140151
141152
.. note::
142153
@@ -150,7 +161,8 @@ Dictionary Objects
150161
.. c:function:: int PyDict_GetItemStringRef(PyObject *p, const char *key, PyObject **result)
151162
152163
Similar than :c:func:`PyDict_GetItemRef`, but *key* is specified as a
153-
:c:expr:`const char*`, rather than a :c:expr:`PyObject*`.
164+
:c:expr:`const char*` UTF-8 encoded bytes string, rather than a
165+
:c:expr:`PyObject*`.
154166
155167
.. versionadded:: 3.13
156168

Doc/whatsnew/3.13.rst

+5
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,11 @@ New Features
862862
not needed.
863863
(Contributed by Victor Stinner in :gh:`106004`.)
864864

865+
* Added :c:func:`PyDict_ContainsString` function: same as
866+
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
867+
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
868+
(Contributed by Victor Stinner in :gh:`108314`.)
869+
865870
* Add :c:func:`Py_IsFinalizing` function: check if the main Python interpreter is
866871
:term:`shutting down <interpreter shutdown>`.
867872
(Contributed by Victor Stinner in :gh:`108014`.)

Include/cpython/dictobject.h

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
5555
}
5656
#define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op))
5757

58+
PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key);
5859
PyAPI_FUNC(int) _PyDict_ContainsId(PyObject *, _Py_Identifier *);
5960

6061
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);

Lib/test/test_capi/test_dict.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99

1010
NULL = None
11+
INVALID_UTF8 = b'\xff'
1112

1213
class DictSubclass(dict):
1314
def __getitem__(self, key):
@@ -137,7 +138,7 @@ def test_dict_getitemstring(self):
137138
self.assertEqual(getitemstring(dct2, b'a'), 1)
138139
self.assertIs(getitemstring(dct2, b'b'), KeyError)
139140

140-
self.assertIs(getitemstring({}, b'\xff'), KeyError)
141+
self.assertIs(getitemstring({}, INVALID_UTF8), KeyError)
141142
self.assertIs(getitemstring(42, b'a'), KeyError)
142143
self.assertIs(getitemstring([], b'a'), KeyError)
143144
# CRASHES getitemstring({}, NULL)
@@ -173,7 +174,7 @@ def test_dict_getitemstringref(self):
173174
self.assertIs(getitemstring(dct2, b'b'), KeyError)
174175

175176
self.assertRaises(SystemError, getitemstring, 42, b'a')
176-
self.assertRaises(UnicodeDecodeError, getitemstring, {}, b'\xff')
177+
self.assertRaises(UnicodeDecodeError, getitemstring, {}, INVALID_UTF8)
177178
self.assertRaises(SystemError, getitemstring, [], b'a')
178179
# CRASHES getitemstring({}, NULL)
179180
# CRASHES getitemstring(NULL, b'a')
@@ -213,6 +214,21 @@ def test_dict_contains(self):
213214
# CRASHES contains(42, 'a')
214215
# CRASHES contains(NULL, 'a')
215216

217+
def test_dict_contains_string(self):
218+
contains_string = _testcapi.dict_containsstring
219+
dct = {'a': 1, '\U0001f40d': 2}
220+
self.assertTrue(contains_string(dct, b'a'))
221+
self.assertFalse(contains_string(dct, b'b'))
222+
self.assertTrue(contains_string(dct, '\U0001f40d'.encode()))
223+
self.assertRaises(UnicodeDecodeError, contains_string, dct, INVALID_UTF8)
224+
225+
dct2 = DictSubclass(dct)
226+
self.assertTrue(contains_string(dct2, b'a'))
227+
self.assertFalse(contains_string(dct2, b'b'))
228+
229+
# CRASHES contains({}, NULL)
230+
# CRASHES contains(NULL, b'a')
231+
216232
def test_dict_setitem(self):
217233
setitem = _testcapi.dict_setitem
218234
dct = {}
@@ -245,7 +261,7 @@ def test_dict_setitemstring(self):
245261
setitemstring(dct2, b'a', 5)
246262
self.assertEqual(dct2, {'a': 5})
247263

248-
self.assertRaises(UnicodeDecodeError, setitemstring, {}, b'\xff', 5)
264+
self.assertRaises(UnicodeDecodeError, setitemstring, {}, INVALID_UTF8, 5)
249265
self.assertRaises(SystemError, setitemstring, UserDict(), b'a', 5)
250266
self.assertRaises(SystemError, setitemstring, 42, b'a', 5)
251267
# CRASHES setitemstring({}, NULL, 5)
@@ -287,7 +303,7 @@ def test_dict_delitemstring(self):
287303
self.assertEqual(dct2, {'c': 2})
288304
self.assertRaises(KeyError, delitemstring, dct2, b'b')
289305

290-
self.assertRaises(UnicodeDecodeError, delitemstring, {}, b'\xff')
306+
self.assertRaises(UnicodeDecodeError, delitemstring, {}, INVALID_UTF8)
291307
self.assertRaises(SystemError, delitemstring, UserDict({'a': 1}), b'a')
292308
self.assertRaises(SystemError, delitemstring, 42, b'a')
293309
# CRASHES delitemstring({}, NULL)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Add :c:func:`PyDict_ContainsString` function: same as
2+
:c:func:`PyDict_Contains`, but *key* is specified as a :c:expr:`const char*`
3+
UTF-8 encoded bytes string, rather than a :c:expr:`PyObject*`.
4+
Patch by Victor Stinner.

Modules/_testcapi/dict.c

+14
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,19 @@ dict_contains(PyObject *self, PyObject *args)
7474
RETURN_INT(PyDict_Contains(obj, key));
7575
}
7676

77+
static PyObject *
78+
dict_containsstring(PyObject *self, PyObject *args)
79+
{
80+
PyObject *obj;
81+
const char *key;
82+
Py_ssize_t size;
83+
if (!PyArg_ParseTuple(args, "Oz#", &obj, &key, &size)) {
84+
return NULL;
85+
}
86+
NULLABLE(obj);
87+
RETURN_INT(PyDict_ContainsString(obj, key));
88+
}
89+
7790
static PyObject *
7891
dict_size(PyObject *self, PyObject *obj)
7992
{
@@ -349,6 +362,7 @@ static PyMethodDef test_methods[] = {
349362
{"dict_getitemref", dict_getitemref, METH_VARARGS},
350363
{"dict_getitemstringref", dict_getitemstringref, METH_VARARGS},
351364
{"dict_contains", dict_contains, METH_VARARGS},
365+
{"dict_containsstring", dict_containsstring, METH_VARARGS},
352366
{"dict_setitem", dict_setitem, METH_VARARGS},
353367
{"dict_setitemstring", dict_setitemstring, METH_VARARGS},
354368
{"dict_delitem", dict_delitem, METH_VARARGS},

Objects/dictobject.c

+12
Original file line numberDiff line numberDiff line change
@@ -3741,6 +3741,18 @@ PyDict_Contains(PyObject *op, PyObject *key)
37413741
return (ix != DKIX_EMPTY && value != NULL);
37423742
}
37433743

3744+
int
3745+
PyDict_ContainsString(PyObject *op, const char *key)
3746+
{
3747+
PyObject *key_obj = PyUnicode_FromString(key);
3748+
if (key_obj == NULL) {
3749+
return -1;
3750+
}
3751+
int res = PyDict_Contains(op, key_obj);
3752+
Py_DECREF(key_obj);
3753+
return res;
3754+
}
3755+
37443756
/* Internal version of PyDict_Contains used when the hash value is already known */
37453757
int
37463758
_PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)

Python/pylifecycle.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -2208,10 +2208,11 @@ add_main_module(PyInterpreterState *interp)
22082208
}
22092209
Py_DECREF(ann_dict);
22102210

2211-
if (_PyDict_GetItemStringWithError(d, "__builtins__") == NULL) {
2212-
if (PyErr_Occurred()) {
2213-
return _PyStatus_ERR("Failed to test __main__.__builtins__");
2214-
}
2211+
int has_builtins = PyDict_ContainsString(d, "__builtins__");
2212+
if (has_builtins < 0) {
2213+
return _PyStatus_ERR("Failed to test __main__.__builtins__");
2214+
}
2215+
if (!has_builtins) {
22152216
PyObject *bimod = PyImport_ImportModule("builtins");
22162217
if (bimod == NULL) {
22172218
return _PyStatus_ERR("Failed to retrieve builtins module");

Python/pythonrun.c

+14-9
Original file line numberDiff line numberDiff line change
@@ -413,10 +413,11 @@ _PyRun_SimpleFileObject(FILE *fp, PyObject *filename, int closeit,
413413
PyObject *dict = PyModule_GetDict(main_module); // borrowed ref
414414

415415
int set_file_name = 0;
416-
if (_PyDict_GetItemStringWithError(dict, "__file__") == NULL) {
417-
if (PyErr_Occurred()) {
418-
goto done;
419-
}
416+
int has_file = PyDict_ContainsString(dict, "__file__");
417+
if (has_file < 0) {
418+
goto done;
419+
}
420+
if (!has_file) {
420421
if (PyDict_SetItemString(dict, "__file__", filename) < 0) {
421422
goto done;
422423
}
@@ -1713,13 +1714,17 @@ run_eval_code_obj(PyThreadState *tstate, PyCodeObject *co, PyObject *globals, Py
17131714
_PyRuntime.signals.unhandled_keyboard_interrupt = 0;
17141715

17151716
/* Set globals['__builtins__'] if it doesn't exist */
1716-
if (globals != NULL && _PyDict_GetItemStringWithError(globals, "__builtins__") == NULL) {
1717-
if (PyErr_Occurred() ||
1718-
PyDict_SetItemString(globals, "__builtins__",
1719-
tstate->interp->builtins) < 0)
1720-
{
1717+
if (globals != NULL) {
1718+
int has_builtins = PyDict_ContainsString(globals, "__builtins__");
1719+
if (has_builtins < 0) {
17211720
return NULL;
17221721
}
1722+
if (!has_builtins) {
1723+
if (PyDict_SetItemString(globals, "__builtins__",
1724+
tstate->interp->builtins) < 0) {
1725+
return NULL;
1726+
}
1727+
}
17231728
}
17241729

17251730
v = PyEval_EvalCode((PyObject*)co, globals, locals);

0 commit comments

Comments
 (0)