Skip to content

Commit 059b5ba

Browse files
authored
gh-85858: Remove PyUnicode_InternImmortal() function (#92579)
Remove the PyUnicode_InternImmortal() function and the SSTATE_INTERNED_IMMORTAL macro. The PyUnicode_InternImmortal() function is still exported in the stable ABI. The function is removed from the API. PyASCIIObject.state.interned size is now a single bit, rather than 2 bits. Keep SSTATE_NOT_INTERNED and SSTATE_INTERNED_MORTAL macros for backward compatibility, but no longer use them internally since the interned member is now a single bit and so can only have two values (interned or not interned). Update stats of _PyUnicode_ClearInterned().
1 parent f62ad4f commit 059b5ba

File tree

7 files changed

+27
-67
lines changed

7 files changed

+27
-67
lines changed

Doc/data/stable_abi.dat

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Doc/whatsnew/3.12.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,7 @@ Removed
174174
* :c:func:`PyUnicode_GET_SIZE`
175175
* :c:func:`PyUnicode_GetSize`
176176
* :c:func:`PyUnicode_GET_DATA_SIZE`
177+
178+
* Remove the ``PyUnicode_InternImmortal()`` function and the
179+
``SSTATE_INTERNED_IMMORTAL`` macro.
180+
(Contributed by Victor Stinner in :gh:`85858`.)

Include/cpython/unicodeobject.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,15 +98,9 @@ typedef struct {
9898
Py_ssize_t length; /* Number of code points in the string */
9999
Py_hash_t hash; /* Hash value; -1 if not set */
100100
struct {
101-
/*
102-
SSTATE_NOT_INTERNED (0)
103-
SSTATE_INTERNED_MORTAL (1)
104-
SSTATE_INTERNED_IMMORTAL (2)
105-
106-
If interned != SSTATE_NOT_INTERNED, the two references from the
107-
dictionary to this object are *not* counted in ob_refcnt.
108-
*/
109-
unsigned int interned:2;
101+
/* If interned is set, the two references from the
102+
dictionary to this object are *not* counted in ob_refcnt. */
103+
unsigned int interned:1;
110104
/* Character size:
111105
112106
- PyUnicode_1BYTE_KIND (1):
@@ -189,7 +183,6 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
189183
/* Interning state. */
190184
#define SSTATE_NOT_INTERNED 0
191185
#define SSTATE_INTERNED_MORTAL 1
192-
#define SSTATE_INTERNED_IMMORTAL 2
193186

194187
/* Use only if you know it's a string */
195188
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {

Include/unicodeobject.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,10 +256,6 @@ PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
256256
const char *u /* UTF-8 encoded string */
257257
);
258258

259-
// PyUnicode_InternImmortal() is deprecated since Python 3.10
260-
// and will be removed in Python 3.12. Use PyUnicode_InternInPlace() instead.
261-
Py_DEPRECATED(3.10) PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
262-
263259
/* --- wchar_t support for platforms which support it --------------------- */
264260

265261
#ifdef HAVE_WCHAR_H
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Remove the ``PyUnicode_InternImmortal()`` function and the
2+
``SSTATE_INTERNED_IMMORTAL`` macro. Patch by Victor Stinner.

Misc/stable_abi.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,6 +1563,7 @@
15631563
added = '3.2'
15641564
[function.PyUnicode_InternImmortal]
15651565
added = '3.2'
1566+
abi_only = true
15661567
[function.PyUnicode_InternInPlace]
15671568
added = '3.2'
15681569
[data.PyUnicode_Type]

Objects/unicodeobject.c

Lines changed: 17 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,13 +1516,8 @@ unicode_dealloc(PyObject *unicode)
15161516
}
15171517
#endif
15181518

1519-
switch (PyUnicode_CHECK_INTERNED(unicode)) {
1520-
case SSTATE_NOT_INTERNED:
1521-
break;
1522-
1523-
case SSTATE_INTERNED_MORTAL:
1524-
{
15251519
#ifdef INTERNED_STRINGS
1520+
if (PyUnicode_CHECK_INTERNED(unicode)) {
15261521
/* Revive the dead object temporarily. PyDict_DelItem() removes two
15271522
references (key and value) which were ignored by
15281523
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
@@ -1536,17 +1531,8 @@ unicode_dealloc(PyObject *unicode)
15361531
}
15371532
assert(Py_REFCNT(unicode) == 1);
15381533
Py_SET_REFCNT(unicode, 0);
1539-
#endif
1540-
break;
1541-
}
1542-
1543-
case SSTATE_INTERNED_IMMORTAL:
1544-
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
1545-
break;
1546-
1547-
default:
1548-
Py_UNREACHABLE();
15491534
}
1535+
#endif
15501536

15511537
if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
15521538
PyObject_Free(_PyUnicode_UTF8(unicode));
@@ -14674,31 +14660,22 @@ PyUnicode_InternInPlace(PyObject **p)
1467414660
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
1467514661
this. */
1467614662
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
14677-
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
14663+
_PyUnicode_STATE(s).interned = 1;
1467814664
#else
1467914665
// PyDict expects that interned strings have their hash
1468014666
// (PyASCIIObject.hash) already computed.
1468114667
(void)unicode_hash(s);
1468214668
#endif
1468314669
}
1468414670

14671+
// Function kept for the stable ABI.
14672+
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
1468514673
void
1468614674
PyUnicode_InternImmortal(PyObject **p)
1468714675
{
14688-
if (PyErr_WarnEx(PyExc_DeprecationWarning,
14689-
"PyUnicode_InternImmortal() is deprecated; "
14690-
"use PyUnicode_InternInPlace() instead", 1) < 0)
14691-
{
14692-
// The function has no return value, the exception cannot
14693-
// be reported to the caller, so just log it.
14694-
PyErr_WriteUnraisable(NULL);
14695-
}
14696-
1469714676
PyUnicode_InternInPlace(p);
14698-
if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
14699-
_PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL;
14700-
Py_INCREF(*p);
14701-
}
14677+
// Leak a reference on purpose
14678+
Py_INCREF(*p);
1470214679
}
1470314680

1470414681
PyObject *
@@ -14733,37 +14710,25 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1473314710
fprintf(stderr, "releasing %zd interned strings\n",
1473414711
PyDict_GET_SIZE(interned));
1473514712

14736-
Py_ssize_t immortal_size = 0, mortal_size = 0;
14713+
Py_ssize_t total_length = 0;
1473714714
#endif
1473814715
Py_ssize_t pos = 0;
1473914716
PyObject *s, *ignored_value;
1474014717
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
14741-
switch (PyUnicode_CHECK_INTERNED(s)) {
14742-
case SSTATE_INTERNED_IMMORTAL:
14743-
Py_SET_REFCNT(s, Py_REFCNT(s) + 1);
14744-
#ifdef INTERNED_STATS
14745-
immortal_size += PyUnicode_GET_LENGTH(s);
14746-
#endif
14747-
break;
14748-
case SSTATE_INTERNED_MORTAL:
14749-
// Restore the two references (key and value) ignored
14750-
// by PyUnicode_InternInPlace().
14751-
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
14718+
assert(PyUnicode_CHECK_INTERNED(s));
14719+
// Restore the two references (key and value) ignored
14720+
// by PyUnicode_InternInPlace().
14721+
Py_SET_REFCNT(s, Py_REFCNT(s) + 2);
1475214722
#ifdef INTERNED_STATS
14753-
mortal_size += PyUnicode_GET_LENGTH(s);
14723+
total_length += PyUnicode_GET_LENGTH(s);
1475414724
#endif
14755-
break;
14756-
case SSTATE_NOT_INTERNED:
14757-
/* fall through */
14758-
default:
14759-
Py_UNREACHABLE();
14760-
}
14761-
_PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
14725+
14726+
_PyUnicode_STATE(s).interned = 0;
1476214727
}
1476314728
#ifdef INTERNED_STATS
1476414729
fprintf(stderr,
14765-
"total size of all interned strings: %zd/%zd mortal/immortal\n",
14766-
mortal_size, immortal_size);
14730+
"total length of all interned strings: %zd characters\n",
14731+
total_length);
1476714732
#endif
1476814733

1476914734
PyDict_Clear(interned);

0 commit comments

Comments
 (0)