From 6624420cc0fbd7e0f0470a6a31e4717d08e0db11 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sat, 10 Aug 2024 12:44:08 -0700 Subject: [PATCH 1/4] gh-122888: Fix crash on certain calls to str() Fixes #122888 --- Lib/test/test_str.py | 10 ++++++++++ .../2024-08-10-12-44-03.gh-issue-122888.TUyu9r.rst | 2 ++ Objects/unicodeobject.c | 11 ++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-08-10-12-44-03.gh-issue-122888.TUyu9r.rst diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 7bdd2881904548..3467dd86797b99 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1747,6 +1747,16 @@ def test_constructor_keyword_args(self): self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" self.assertEqual(str(object=b'foo', errors='strict'), 'foo') + def test_constructor_errors(self): + with self.assertRaises(TypeError): + str(b"x", b"ascii") + with self.assertRaises(TypeError): + str(b"x", encoding=b"ascii") + with self.assertRaises(TypeError): + str(b"x", "ascii", b"strict") + with self.assertRaises(TypeError): + str(b"x", "ascii", errors=b"strict") + def test_constructor_defaults(self): """Check the constructor argument defaults.""" # The object argument defaults to '' or b''. diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-08-10-12-44-03.gh-issue-122888.TUyu9r.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-10-12-44-03.gh-issue-122888.TUyu9r.rst new file mode 100644 index 00000000000000..93171360df0dda --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-08-10-12-44-03.gh-issue-122888.TUyu9r.rst @@ -0,0 +1,2 @@ +Fix crash on certain calls to ``str()`` with positional arguments of the +wrong type. Patch by Jelle Zijlstra. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 12578812a762f6..da9c5857cc3bee 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15121,7 +15121,16 @@ unicode_vectorcall(PyObject *type, PyObject *const *args, return PyObject_Str(object); } const char *encoding = arg_as_utf8(args[1], "encoding"); - const char *errors = (nargs == 3) ? arg_as_utf8(args[2], "errors") : NULL; + if (encoding == NULL) { + return NULL; + } + const char *errors = NULL; + if (nargs == 3) { + errors = arg_as_utf8(args[2], "errors"); + if (errors == NULL) { + return NULL; + } + } return PyUnicode_FromEncodedObject(object, encoding, errors); } From e2d9bf7da80fe2bc7310df654f27d961ddca40ed Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sat, 10 Aug 2024 12:53:45 -0700 Subject: [PATCH 2/4] Improve tests --- Lib/test/test_str.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 3467dd86797b99..b14016d2b1406d 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1748,14 +1748,16 @@ def test_constructor_keyword_args(self): self.assertEqual(str(object=b'foo', errors='strict'), 'foo') def test_constructor_errors(self): - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): str(b"x", b"ascii") - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): str(b"x", encoding=b"ascii") - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): str(b"x", "ascii", b"strict") - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): str(b"x", "ascii", errors=b"strict") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") def test_constructor_defaults(self): """Check the constructor argument defaults.""" From 6456846b4c2186c3bdc4b1461d111fb16abda088 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 11 Aug 2024 16:06:37 -0700 Subject: [PATCH 3/4] unify and extend tests --- Lib/test/test_str.py | 51 +++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index b14016d2b1406d..605e2c7fcfc851 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1747,18 +1747,6 @@ def test_constructor_keyword_args(self): self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'" self.assertEqual(str(object=b'foo', errors='strict'), 'foo') - def test_constructor_errors(self): - with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): - str(b"x", b"ascii") - with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): - str(b"x", encoding=b"ascii") - with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): - str(b"x", "ascii", b"strict") - with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): - str(b"x", "ascii", errors=b"strict") - with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): - str("too", "many", "argu", "ments") - def test_constructor_defaults(self): """Check the constructor argument defaults.""" # The object argument defaults to '' or b''. @@ -2667,19 +2655,44 @@ def test_str_invalid_call(self): check = lambda *a, **kw: self.assertRaises(TypeError, str, *a, **kw) # too many args - check(1, "", "", 1) + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str("too", "many", "argu", "ments") + with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): + str(1, "", "", 1) # no such kw arg - check(test=1) + with self.assertRaisesRegex(TypeError, r"str\(\) got an unexpected keyword argument 'test'"): + str(test=1) # 'encoding' must be str - check(1, encoding=1) - check(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, encoding=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", b"ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not bytes"): + str(b"x", encoding=b"ascii") # 'errors' must be str - check(1, errors=1) - check(1, "", errors=1) - check(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'encoding' must be str, not int"): + str(1, 1, 1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not int"): + str(1, "", errors=1) + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", b"strict") + with self.assertRaisesRegex(TypeError, r"str\(\) argument 'errors' must be str, not bytes"): + str(b"x", "ascii", errors=b"strict") + + # both positional and kwarg + with self.assertRaisesRegex(TypeError, r"argument for str\(\) given by name \('encoding'\) and position \(2\)"): + str(b"x", "utf-8", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "ignore", encoding="ascii") + with self.assertRaisesRegex(TypeError, r"str\(\) takes at most 3 arguments \(4 given\)"): + str(b"x", "utf-8", "strict", errors="ignore") class StringModuleTest(unittest.TestCase): From bfff977657c3e5efb5870572a136f55ea9851183 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Mon, 12 Aug 2024 07:50:43 -0700 Subject: [PATCH 4/4] clean up test --- Lib/test/test_str.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index 605e2c7fcfc851..b9ab13711db8c9 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1736,8 +1736,6 @@ def __str__(self): 'character buffers are decoded to unicode' ) - self.assertRaises(TypeError, str, 42, 42, 42) - def test_constructor_keyword_args(self): """Pass various keyword argument combinations to the constructor.""" # The object argument can be passed as a keyword. @@ -2652,8 +2650,6 @@ def test_check_encoding_errors(self): self.assertEqual(proc.rc, 10, proc) def test_str_invalid_call(self): - check = lambda *a, **kw: self.assertRaises(TypeError, str, *a, **kw) - # too many args with self.assertRaisesRegex(TypeError, r"str expected at most 3 arguments, got 4"): str("too", "many", "argu", "ments")