Skip to content

Commit 0834905

Browse files
[3.6] bpo-13617: Reject embedded null characters in wchar* strings. (GH-2302) (#2462)
Based on patch by Victor Stinner. Add private C API function _PyUnicode_AsUnicode() which is similar to PyUnicode_AsUnicode(), but checks for null characters.. (cherry picked from commit f7eae0a)
1 parent 413c0a9 commit 0834905

22 files changed

+115
-24
lines changed

Include/unicodeobject.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -752,23 +752,27 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
752752
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
753753
#endif
754754

755+
#ifndef Py_LIMITED_API
755756
/* Return a read-only pointer to the Unicode object's internal
756757
Py_UNICODE buffer.
757758
If the wchar_t/Py_UNICODE representation is not yet available, this
758759
function will calculate it. */
759760

760-
#ifndef Py_LIMITED_API
761761
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
762762
PyObject *unicode /* Unicode object */
763763
);
764-
#endif
764+
765+
/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
766+
contains null characters. */
767+
PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
768+
PyObject *unicode /* Unicode object */
769+
);
765770

766771
/* Return a read-only pointer to the Unicode object's internal
767772
Py_UNICODE buffer and save the length at size.
768773
If the wchar_t/Py_UNICODE representation is not yet available, this
769774
function will calculate it. */
770775

771-
#ifndef Py_LIMITED_API
772776
PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
773777
PyObject *unicode, /* Unicode object */
774778
Py_ssize_t *size /* location where to save the length */

Lib/ctypes/test/test_loading.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ def test_load_library(self):
6262
windll["kernel32"].GetModuleHandleW
6363
windll.LoadLibrary("kernel32").GetModuleHandleW
6464
WinDLL("kernel32").GetModuleHandleW
65+
# embedded null character
66+
self.assertRaises(ValueError, windll.LoadLibrary, "kernel32\0")
6567

6668
@unittest.skipUnless(os.name == "nt",
6769
'test specific to Windows')

Lib/test/test_builtin.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ def test_import(self):
151151
self.assertRaises(TypeError, __import__, 1, 2, 3, 4)
152152
self.assertRaises(ValueError, __import__, '')
153153
self.assertRaises(TypeError, __import__, 'sys', name='sys')
154+
# embedded null character
155+
self.assertRaises(ModuleNotFoundError, __import__, 'string\x00')
154156

155157
def test_abs(self):
156158
# int
@@ -1002,6 +1004,10 @@ def test_open(self):
10021004
self.assertEqual(fp.read(300), 'XXX'*100)
10031005
self.assertEqual(fp.read(1000), 'YYY'*100)
10041006

1007+
# embedded null bytes and characters
1008+
self.assertRaises(ValueError, open, 'a\x00b')
1009+
self.assertRaises(ValueError, open, b'a\x00b')
1010+
10051011
def test_open_default_encoding(self):
10061012
old_environ = dict(os.environ)
10071013
try:

Lib/test/test_curses.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_window_funcs(self):
8181
win2 = curses.newwin(15,15, 5,5)
8282

8383
for meth in [stdscr.addch, stdscr.addstr]:
84-
for args in [('a'), ('a', curses.A_BOLD),
84+
for args in [('a',), ('a', curses.A_BOLD),
8585
(4,4, 'a'), (5,5, 'a', curses.A_BOLD)]:
8686
with self.subTest(meth=meth.__qualname__, args=args):
8787
meth(*args)
@@ -194,6 +194,15 @@ def test_window_funcs(self):
194194
self.assertRaises(ValueError, stdscr.instr, -2)
195195
self.assertRaises(ValueError, stdscr.instr, 2, 3, -2)
196196

197+
def test_embedded_null_chars(self):
198+
# reject embedded null bytes and characters
199+
stdscr = self.stdscr
200+
for arg in ['a', b'a']:
201+
with self.subTest(arg=arg):
202+
self.assertRaises(ValueError, stdscr.addstr, 'a\0')
203+
self.assertRaises(ValueError, stdscr.addnstr, 'a\0', 1)
204+
self.assertRaises(ValueError, stdscr.insstr, 'a\0')
205+
self.assertRaises(ValueError, stdscr.insnstr, 'a\0', 1)
197206

198207
def test_module_funcs(self):
199208
"Test module-level functions"

Lib/test/test_grp.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ def test_errors(self):
5050
self.assertRaises(TypeError, grp.getgrgid)
5151
self.assertRaises(TypeError, grp.getgrnam)
5252
self.assertRaises(TypeError, grp.getgrall, 42)
53+
# embedded null character
54+
self.assertRaises(ValueError, grp.getgrnam, 'a\x00b')
5355

5456
# try to get some errors
5557
bynames = {}

Lib/test/test_imp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ def test_multiple_calls_to_get_data(self):
314314
loader.get_data(imp.__file__) # File should be closed
315315
loader.get_data(imp.__file__) # Will need to create a newly opened file
316316

317+
def test_load_source(self):
318+
with self.assertRaisesRegex(ValueError, 'embedded null'):
319+
imp.load_source(__name__, __file__ + "\0")
320+
317321

318322
class ReloadTests(unittest.TestCase):
319323

Lib/test/test_locale.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,14 @@ def test_strcoll(self):
339339
self.assertLess(locale.strcoll('a', 'b'), 0)
340340
self.assertEqual(locale.strcoll('a', 'a'), 0)
341341
self.assertGreater(locale.strcoll('b', 'a'), 0)
342+
# embedded null character
343+
self.assertRaises(ValueError, locale.strcoll, 'a\0', 'a')
344+
self.assertRaises(ValueError, locale.strcoll, 'a', 'a\0')
342345

343346
def test_strxfrm(self):
344347
self.assertLess(locale.strxfrm('a'), locale.strxfrm('b'))
348+
# embedded null character
349+
self.assertRaises(ValueError, locale.strxfrm, 'a\0')
345350

346351

347352
class TestEnUSCollation(BaseLocalizedTest, TestCollation):

Lib/test/test_time.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ def test_strftime(self):
126126
except ValueError:
127127
self.fail('conversion specifier: %r failed.' % format)
128128

129+
self.assertRaises(TypeError, time.strftime, b'%S', tt)
130+
# embedded null character
131+
self.assertRaises(ValueError, time.strftime, '%S\0', tt)
132+
129133
def _bounds_checking(self, func):
130134
# Make sure that strftime() checks the bounds of the various parts
131135
# of the time tuple (0 is valid for *all* values).

Lib/test/test_winsound.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def test_errors(self):
9898
self.assertRaises(TypeError, winsound.PlaySound, "bad",
9999
winsound.SND_MEMORY)
100100
self.assertRaises(TypeError, winsound.PlaySound, 1, 0)
101+
# embedded null character
102+
self.assertRaises(ValueError, winsound.PlaySound, 'bad\0', 0)
101103

102104
def test_keyword_args(self):
103105
safe_PlaySound(flags=winsound.SND_ALIAS, sound="SystemExit")

Modules/_ctypes/callproc.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,14 +1231,15 @@ The handle may be used to locate exported functions in this\n\
12311231
module.\n";
12321232
static PyObject *load_library(PyObject *self, PyObject *args)
12331233
{
1234-
WCHAR *name;
1234+
const WCHAR *name;
12351235
PyObject *nameobj;
12361236
PyObject *ignored;
12371237
HMODULE hMod;
1238-
if (!PyArg_ParseTuple(args, "O|O:LoadLibrary", &nameobj, &ignored))
1238+
1239+
if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored))
12391240
return NULL;
12401241

1241-
name = PyUnicode_AsUnicode(nameobj);
1242+
name = _PyUnicode_AsUnicode(nameobj);
12421243
if (!name)
12431244
return NULL;
12441245

Modules/_cursesmodule.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ static int
342342
PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
343343
PyObject **bytes, wchar_t **wstr)
344344
{
345+
char *str;
345346
if (PyUnicode_Check(obj)) {
346347
#ifdef HAVE_NCURSESW
347348
assert (wstr != NULL);
@@ -354,12 +355,20 @@ PyCurses_ConvertToString(PyCursesWindowObject *win, PyObject *obj,
354355
*bytes = PyUnicode_AsEncodedString(obj, win->encoding, NULL);
355356
if (*bytes == NULL)
356357
return 0;
358+
/* check for embedded null bytes */
359+
if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
360+
return 0;
361+
}
357362
return 1;
358363
#endif
359364
}
360365
else if (PyBytes_Check(obj)) {
361366
Py_INCREF(obj);
362367
*bytes = obj;
368+
/* check for embedded null bytes */
369+
if (PyBytes_AsStringAndSize(*bytes, &str, NULL) < 0) {
370+
return 0;
371+
}
363372
return 1;
364373
}
365374

Modules/_io/fileio.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,10 @@ _io_FileIO___init___impl(fileio *self, PyObject *nameobj, const char *mode,
280280

281281
if (fd < 0) {
282282
#ifdef MS_WINDOWS
283-
Py_ssize_t length;
284283
if (!PyUnicode_FSDecoder(nameobj, &stringobj)) {
285284
return -1;
286285
}
287-
widename = PyUnicode_AsUnicodeAndSize(stringobj, &length);
286+
widename = PyUnicode_AsUnicode(stringobj);
288287
if (widename == NULL)
289288
return -1;
290289
#else

Modules/_localemodule.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,11 @@ PyLocale_strxfrm(PyObject* self, PyObject* args)
252252
s = PyUnicode_AsWideCharString(str, &n1);
253253
if (s == NULL)
254254
goto exit;
255+
if (wcslen(s) != (size_t)n1) {
256+
PyErr_SetString(PyExc_ValueError,
257+
"embedded null character");
258+
goto exit;
259+
}
255260

256261
/* assume no change in size, first */
257262
n1 = n1 + 1;

Modules/grpmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ grp_getgrnam_impl(PyObject *module, PyObject *name)
151151

152152
if ((bytes = PyUnicode_EncodeFSDefault(name)) == NULL)
153153
return NULL;
154+
/* check for embedded null bytes */
154155
if (PyBytes_AsStringAndSize(bytes, &name_chars, NULL) == -1)
155156
goto out;
156157

Modules/nismodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ nis_match (PyObject *self, PyObject *args, PyObject *kwdict)
169169
return NULL;
170170
if ((bkey = PyUnicode_EncodeFSDefault(ukey)) == NULL)
171171
return NULL;
172+
/* check for embedded null bytes */
172173
if (PyBytes_AsStringAndSize(bkey, &key, &keylen) == -1) {
173174
Py_DECREF(bkey);
174175
return NULL;

Modules/posixmodule.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,7 +3680,7 @@ os__getfinalpathname_impl(PyObject *module, PyObject *path)
36803680
PyObject *result;
36813681
const wchar_t *path_wchar;
36823682

3683-
path_wchar = PyUnicode_AsUnicode(path);
3683+
path_wchar = _PyUnicode_AsUnicode(path);
36843684
if (path_wchar == NULL)
36853685
return NULL;
36863686

@@ -7088,7 +7088,7 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs)
70887088
))
70897089
return NULL;
70907090

7091-
path = PyUnicode_AsUnicode(po);
7091+
path = _PyUnicode_AsUnicode(po);
70927092
if (path == NULL)
70937093
return NULL;
70947094

@@ -8881,6 +8881,7 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
88818881
/*[clinic end generated code: output=d29a567d6b2327d2 input=ba586581c2e6105f]*/
88828882
{
88838883
const wchar_t *env;
8884+
Py_ssize_t size;
88848885

88858886
/* Search from index 1 because on Windows starting '=' is allowed for
88868887
defining hidden environment variables. */
@@ -8894,16 +8895,21 @@ os_putenv_impl(PyObject *module, PyObject *name, PyObject *value)
88948895
if (unicode == NULL) {
88958896
return NULL;
88968897
}
8897-
if (_MAX_ENV < PyUnicode_GET_LENGTH(unicode)) {
8898+
8899+
env = PyUnicode_AsUnicodeAndSize(unicode, &size);
8900+
if (env == NULL)
8901+
goto error;
8902+
if (size > _MAX_ENV) {
88988903
PyErr_Format(PyExc_ValueError,
88998904
"the environment variable is longer than %u characters",
89008905
_MAX_ENV);
89018906
goto error;
89028907
}
8903-
8904-
env = PyUnicode_AsUnicode(unicode);
8905-
if (env == NULL)
8908+
if (wcslen(env) != (size_t)size) {
8909+
PyErr_SetString(PyExc_ValueError, "embedded null character");
89068910
goto error;
8911+
}
8912+
89078913
if (_wputenv(env)) {
89088914
posix_error();
89098915
goto error;

Modules/pwdmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ pwd_getpwnam_impl(PyObject *module, PyObject *arg)
158158

159159
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
160160
return NULL;
161+
/* check for embedded null bytes */
161162
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
162163
goto out;
163164
if ((p = getpwnam(name)) == NULL) {

Modules/spwdmodule.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ spwd_getspnam_impl(PyObject *module, PyObject *arg)
134134

135135
if ((bytes = PyUnicode_EncodeFSDefault(arg)) == NULL)
136136
return NULL;
137+
/* check for embedded null bytes */
137138
if (PyBytes_AsStringAndSize(bytes, &name, NULL) == -1)
138139
goto out;
139140
if ((p = getspnam(name)) == NULL) {

Objects/unicodeobject.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4164,6 +4164,20 @@ PyUnicode_AsUnicode(PyObject *unicode)
41644164
return PyUnicode_AsUnicodeAndSize(unicode, NULL);
41654165
}
41664166

4167+
const Py_UNICODE *
4168+
_PyUnicode_AsUnicode(PyObject *unicode)
4169+
{
4170+
Py_ssize_t size;
4171+
const Py_UNICODE *wstr;
4172+
4173+
wstr = PyUnicode_AsUnicodeAndSize(unicode, &size);
4174+
if (wstr && wcslen(wstr) != (size_t)size) {
4175+
PyErr_SetString(PyExc_ValueError, "embedded null character");
4176+
return NULL;
4177+
}
4178+
return wstr;
4179+
}
4180+
41674181

41684182
Py_ssize_t
41694183
PyUnicode_GetSize(PyObject *unicode)

PC/_msi.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,8 +600,12 @@ summary_setproperty(msiobj* si, PyObject *args)
600600
return NULL;
601601

602602
if (PyUnicode_Check(data)) {
603+
const WCHAR *value = _PyUnicode_AsUnicode(data);
604+
if (value == NULL) {
605+
return NULL;
606+
}
603607
status = MsiSummaryInfoSetPropertyW(si->h, field, VT_LPSTR,
604-
0, NULL, PyUnicode_AsUnicode(data));
608+
0, NULL, value);
605609
} else if (PyLong_CheckExact(data)) {
606610
long value = PyLong_AsLong(data);
607611
if (value == -1 && PyErr_Occurred()) {

Python/dynload_win.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,13 @@ dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix,
190190
{
191191
dl_funcptr p;
192192
char funcname[258], *import_python;
193-
wchar_t *wpathname;
193+
const wchar_t *wpathname;
194194

195195
#ifndef _DEBUG
196196
_Py_CheckPython3();
197197
#endif
198198

199-
wpathname = PyUnicode_AsUnicode(pathname);
199+
wpathname = _PyUnicode_AsUnicode(pathname);
200200
if (wpathname == NULL)
201201
return NULL;
202202

0 commit comments

Comments
 (0)