Skip to content

Commit f7eae0a

Browse files
[security] bpo-13617: Reject embedded null characters in wchar* strings. (#2302)
Based on patch by Victor Stinner. Add private C API function _PyUnicode_AsUnicode() which is similar to PyUnicode_AsUnicode(), but checks for null characters.
1 parent 592eda1 commit f7eae0a

22 files changed

+115
-23
lines changed

Include/unicodeobject.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -756,23 +756,27 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
756756
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
757757
#endif
758758

759+
#ifndef Py_LIMITED_API
759760
/* Return a read-only pointer to the Unicode object's internal
760761
Py_UNICODE buffer.
761762
If the wchar_t/Py_UNICODE representation is not yet available, this
762763
function will calculate it. */
763764

764-
#ifndef Py_LIMITED_API
765765
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
766766
PyObject *unicode /* Unicode object */
767767
) /* Py_DEPRECATED(3.3) */;
768-
#endif
768+
769+
/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
770+
contains null characters. */
771+
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
772+
PyObject *unicode /* Unicode object */
773+
);
769774

770775
/* Return a read-only pointer to the Unicode object's internal
771776
Py_UNICODE buffer and save the length at size.
772777
If the wchar_t/Py_UNICODE representation is not yet available, this
773778
function will calculate it. */
774779

775-
#ifndef Py_LIMITED_API
776780
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
777781
PyObject *unicode, /* Unicode object */
778782
Py_ssize_t *size /* location where to save the length */

Lib/ctypes/test/test_loading.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def test_load_library(self):
6262
windll["kernel32"].GetModuleHandleW
6363
windll.LoadLibrary("kernel32").GetModuleHandleW
6464
WinDLL("kernel32").GetModuleHandleW
65+
# embedded null character
66+
self.assertRaises(ValueError, windll.LoadLibrary, "kernel32\0")
6567

6668
@unittest.skipUnless(os.name == "nt",
6769
'test specific to Windows')

Lib/test/test_builtin.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def test_import(self):
151151
self.assertRaises(TypeError, __import__, 1, 2, 3, 4)
152152
self.assertRaises(ValueError, __import__, '')
153153
self.assertRaises(TypeError, __import__, 'sys', name='sys')
154+
# embedded null character
155+
self.assertRaises(ModuleNotFoundError, __import__, 'string\x00')
154156

155157
def test_abs(self):
156158
# int
@@ -1010,6 +1012,10 @@ def test_open(self):
10101012
self.assertEqual(fp.read(300), 'XXX'*100)
10111013
self.assertEqual(fp.read(1000), 'YYY'*100)
10121014

1015+
# embedded null bytes and characters
1016+
self.assertRaises(ValueError, open, 'a\x00b')
1017+
self.assertRaises(ValueError, open, b'a\x00b')
1018+
10131019
def test_open_default_encoding(self):
10141020
old_environ = dict(os.environ)
10151021
try:

Lib/test/test_curses.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_window_funcs(self):
8181
win2 = curses.newwin(15,15, 5,5)
8282

8383
for meth in [stdscr.addch, stdscr.addstr]:
84-
for args in [('a'), ('a', curses.A_BOLD),
84+
for args in [('a',), ('a', curses.A_BOLD),
8585
(4,4, 'a'), (5,5, 'a', curses.A_BOLD)]:
8686
with self.subTest(meth=meth.__qualname__, args=args):
8787
meth(*args)
@@ -194,6 +194,15 @@ def test_window_funcs(self):
194194
self.assertRaises(ValueError, stdscr.instr, -2)
195195
self.assertRaises(ValueError, stdscr.instr, 2, 3, -2)
196196

197+
def test_embedded_null_chars(self):
198+
# reject embedded null bytes and characters
199+
stdscr = self.stdscr
200+
for arg in ['a', b'a']:
201+
with self.subTest(arg=arg):
202+
self.assertRaises(ValueError, stdscr.addstr, 'a\0')
203+
self.assertRaises(ValueError, stdscr.addnstr, 'a\0', 1)
204+
self.assertRaises(ValueError, stdscr.insstr, 'a\0')
205+
self.assertRaises(ValueError, stdscr.insnstr, 'a\0', 1)
197206

198207
def test_module_funcs(self):
199208
"Test module-level functions"

Lib/test/test_grp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def test_errors(self):
5050
self.assertRaises(TypeError, grp.getgrgid)
5151
self.assertRaises(TypeError, grp.getgrnam)
5252
self.assertRaises(TypeError, grp.getgrall, 42)
53+
# embedded null character
54+
self.assertRaises(ValueError, grp.getgrnam, 'a\x00b')
5355

5456
# try to get some errors
5557
bynames = {}

Lib/test/test_imp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ def test_multiple_calls_to_get_data(self):
314314
loader.get_data(imp.__file__) # File should be closed
315315
loader.get_data(imp.__file__) # Will need to create a newly opened file
316316

317+
def test_load_source(self):
318+
with self.assertRaisesRegex(ValueError, 'embedded null'):
319+
imp.load_source(__name__, __file__ + "\0")
320+
317321

318322
class ReloadTests(unittest.TestCase):
319323

Lib/test/test_locale.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,9 +346,14 @@ def test_strcoll(self):
346346
self.assertLess(locale.strcoll('a', 'b'), 0)
347347
self.assertEqual(locale.strcoll('a', 'a'), 0)
348348
self.assertGreater(locale.strcoll('b', 'a'), 0)
349+
# embedded null character
350+
self.assertRaises(ValueError, locale.strcoll, 'a\0', 'a')
351+
self.assertRaises(ValueError, locale.strcoll, 'a', 'a\0')
349352

350353
def test_strxfrm(self):
351354
self.assertLess(locale.strxfrm('a'), locale.strxfrm('b'))
355+
# embedded null character
356+
self.assertRaises(ValueError, locale.strxfrm, 'a\0')
352357

353358

354359
class TestEnUSCollation(BaseLocalizedTest, TestCollation):

Lib/test/test_time.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ def test_strftime(self):
126126
except ValueError:
127127
self.fail('conversion specifier: %r failed.' % format)
128128

129+
self.assertRaises(TypeError, time.strftime, b'%S', tt)
130+
# embedded null character
131+
self.assertRaises(ValueError, time.strftime, '%S\0', tt)
132+
129133
def _bounds_checking(self, func):
130134
# Make sure that strftime() checks the bounds of the various parts
131135
# of the time tuple (0 is valid for *all* values).

Lib/test/test_winsound.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ def test_errors(self):
9696
self.assertRaises(TypeError, winsound.PlaySound, "bad",
9797
winsound.SND_MEMORY)
9898
self.assertRaises(TypeError, winsound.PlaySound, 1, 0)
99+
# embedded null character
100+
self.assertRaises(ValueError, winsound.PlaySound, 'bad\0', 0)
99101

100102
def test_keyword_args(self):
101103
safe_PlaySound(flags=winsound.SND_ALIAS, sound="SystemExit")

Modules/_ctypes/callproc.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,10 +1253,11 @@ static PyObject *load_library(PyObject *self, PyObject *args)
12531253
PyObject *nameobj;
12541254
PyObject *ignored;
12551255
HMODULE hMod;
1256-
if (!PyArg_ParseTuple(args, "O|O:LoadLibrary", &nameobj, &ignored))
1256+
1257+
if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored))
12571258
return NULL;
12581259

1259-
name = PyUnicode_AsUnicode(nameobj);
1260+
name = _PyUnicode_AsUnicode(nameobj);
12601261
if (!name)
12611262
return NULL;
12621263

Modules/_cursesmodule.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,11 @@ static int
341341
PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
342342
PyObject **bytes, wchar_t **wstr)
343343
{
344+
char *str;
344345
if (PyUnicode_Check(obj)) {
345346
#ifdef HAVE_NCURSESW
346347
assert (wstr != NULL);
348+
347349
*wstr = PyUnicode_AsWideCharString(obj, NULL);
348350
if (*wstr == NULL)
349351
return 0;
@@ -353,12 +355,20 @@ PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
353355
*bytes = PyUnicode_AsEncodedString(obj, win->encoding, NULL);
354356
if (*bytes == NULL)
355357
return 0;
358+
/* check for embedded null bytes */
359+
if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
360+
return 0;
361+
}
356362
return 1;
357363
#endif
358364
}
359365
else if (PyBytes_Check(obj)) {
360366
Py_INCREF(obj);
361367
*bytes = obj;
368+
/* check for embedded null bytes */
369+
if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
370+
return 0;
371+
}
362372
return 1;
363373
}
364374

Modules/_io/fileio.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
271271

272272
if (fd < 0) {
273273
#ifdef MS_WINDOWS
274-
Py_ssize_t length;
275274
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
276275
return -1;
277276
}
278-
widename = PyUnicode_AsUnicodeAndSize(stringobj, &length);
277+
widename = PyUnicode_AsUnicode(stringobj);
279278
if (widename == NULL)
280279
return -1;
281280
#else

Modules/_localemodule.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,11 @@ PyLocale_strxfrm(PyObject* self, PyObject* args)
252252
s = PyUnicode_AsWideCharString(str, &n1);
253253
if (s == NULL)
254254
goto exit;
255+
if (wcslen(s) != (size_t)n1) {
256+
PyErr_SetString(PyExc_ValueError,
257+
"embedded null character");
258+
goto exit;
259+
}
255260

256261
/* assume no change in size, first */
257262
n1 = n1 + 1;

Modules/grpmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ grp_getgrnam_impl(PyObject *module, PyObject *name)
151151

152152
if ((bytes = PyUnicode_EncodeFSDefault(name)) == NULL)
153153
return NULL;
154+
/* check for embedded null bytes */
154155
if (PyBytes_AsStringAndSize(bytes, &name_chars, NULL) == -1)
155156
goto out;
156157

Modules/nismodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ nis_match (PyObject *self, PyObject *args, PyObject *kwdict)
169169
return NULL;
170170
if ((bkey = PyUnicode_EncodeFSDefault(ukey)) == NULL)
171171
return NULL;
172+
/* check for embedded null bytes */
172173
if (PyBytes_AsStringAndSize(bkey, &key, &keylen) == -1) {
173174
Py_DECREF(bkey);
174175
return NULL;

Modules/posixmodule.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3757,7 +3757,7 @@ os__getfinalpathname_impl(PyObject *module, PyObject *path)
37573757
PyObject *result;
37583758
const wchar_t *path_wchar;
37593759

3760-
path_wchar = PyUnicode_AsUnicode(path);
3760+
path_wchar = _PyUnicode_AsUnicode(path);
37613761
if (path_wchar == NULL)
37623762
return NULL;
37633763

@@ -7209,7 +7209,7 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs)
72097209
))
72107210
return NULL;
72117211

7212-
path = PyUnicode_AsUnicode(po);
7212+
path = _PyUnicode_AsUnicode(po);
72137213
if (path == NULL)
72147214
return NULL;
72157215

@@ -9002,6 +9002,7 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
90029002
/*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/
90039003
{
90049004
const wchar_t *env;
9005+
Py_ssize_t size;
90059006

90069007
/* Search from index 1 because on Windows starting '=' is allowed for
90079008
defining hidden environment variables. */
@@ -9015,16 +9016,21 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
90159016
if (unicode == NULL) {
90169017
return NULL;
90179018
}
9018-
if (_MAX_ENV < PyUnicode_GET_LENGTH(unicode)) {
9019+
9020+
env = PyUnicode_AsUnicodeAndSize(unicode, &size);
9021+
if (env == NULL)
9022+
goto error;
9023+
if (size > _MAX_ENV) {
90199024
PyErr_Format(PyExc_ValueError,
90209025
"the environment variable is longer than %u characters",
90219026
_MAX_ENV);
90229027
goto error;
90239028
}
9024-
9025-
env = PyUnicode_AsUnicode(unicode);
9026-
if (env == NULL)
9029+
if (wcslen(env) != (size_t)size) {
9030+
PyErr_SetString(PyExc_ValueError, "embedded null character");
90279031
goto error;
9032+
}
9033+
90289034
if (_wputenv(env)) {
90299035
posix_error();
90309036
goto error;

Modules/pwdmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ pwd_getpwnam_impl(PyObject *module, PyObject *arg)
158158

159159
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
160160
return NULL;
161+
/* check for embedded null bytes */
161162
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
162163
goto out;
163164
if ((p = getpwnam(name)) == NULL) {

Modules/spwdmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ spwd_getspnam_impl(PyObject *module, PyObject *arg)
134134

135135
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
136136
return NULL;
137+
/* check for embedded null bytes */
137138
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
138139
goto out;
139140
if ((p = getspnam(name)) == NULL) {

Objects/unicodeobject.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4133,6 +4133,20 @@ PyUnicode_AsUnicode(PyObject *unicode)
41334133
return PyUnicode_AsUnicodeAndSize(unicode, NULL);
41344134
}
41354135

4136+
const Py_UNICODE *
4137+
_PyUnicode_AsUnicode(PyObject *unicode)
4138+
{
4139+
Py_ssize_t size;
4140+
const Py_UNICODE *wstr;
4141+
4142+
wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
4143+
if (wstr && wcslen(wstr) != (size_t)size) {
4144+
PyErr_SetString(PyExc_ValueError, "embedded null character");
4145+
return NULL;
4146+
}
4147+
return wstr;
4148+
}
4149+
41364150

41374151
Py_ssize_t
41384152
PyUnicode_GetSize(PyObject *unicode)

PC/_msi.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,12 @@ summary_setproperty(msiobj* si, PyObject *args)
594594
return NULL;
595595

596596
if (PyUnicode_Check(data)) {
597+
WCHAR *value = _PyUnicode_AsUnicode(data);
598+
if (value == NULL) {
599+
return NULL;
600+
}
597601
status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR,
598-
0, NULL, PyUnicode_AsUnicode(data));
602+
0, NULL, value);
599603
} else if (PyLong_CheckExact(data)) {
600604
long value = PyLong_AsLong(data);
601605
if (value == -1 && PyErr_Occurred()) {

Python/dynload_win.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,
190190
{
191191
dl_funcptr p;
192192
char funcname[258], *import_python;
193-
wchar_t *wpathname;
193+
const wchar_t *wpathname;
194194

195195
#ifndef _DEBUG
196196
_Py_CheckPython3();
197197
#endif
198198

199-
wpathname = PyUnicode_AsUnicode(pathname);
199+
wpathname = _PyUnicode_AsUnicode(pathname);
200200
if (wpathname == NULL)
201201
return NULL;
202202

0 commit comments

Comments
 (0)