From fea2ac97efdbbe6cbeb2b9d0730061e99d525ebb Mon Sep 17 00:00:00 2001 From: Sidney Markowitz Date: Thu, 7 Jul 2022 17:07:00 +1200 Subject: [PATCH 1/7] gh-94606: Fix error when message has Unicode surrogate but is not valid surrogateescaped string --- Lib/email/message.py | 2 +- Lib/email/utils.py | 18 +++++++++++++-- Lib/test/test_email/test_message.py | 36 +++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 65fda507251ce3..d14fd92b8986d7 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -290,7 +290,7 @@ def get_payload(self, i=None, decode=False): cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. if isinstance(payload, str): - if utils._has_surrogates(payload): + if utils._has_decoded_with_surrogateescape(payload): bpayload = payload.encode('ascii', 'surrogateescape') if not decode: try: diff --git a/Lib/email/utils.py b/Lib/email/utils.py index cfdfeb3f1a86e4..ab2b4f849822dc 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -49,16 +49,30 @@ escapesre = re.compile(r'[\\"]') def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" + """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 # (Python's default encoding) can encode any string. This is the fastest - # way to check for surrogates, see issue 11454 for timings. + # way to check for surrogates, see issue 11454 (moved to GH 55663) for timings. + # This will pass some strings that are not valid for surrogateescape encoding. try: s.encode() return False except UnicodeEncodeError: return True +def _has_decoded_with_surrogateescape(s): + """Return True if s is a valid str decoded using surrogateescape""" + # Slower test than _has_surrogates to be used when the string must + # be encodable with surrogateescape, but is no slower if the string + # does not have any unicode surrogate characters. + if _has_surrogates(s): + try: + s.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + return False + return True + return False + # How to deal with a string containing bytes before handing it to the # application through the 'normal' interface. def _sanitize(string): diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 4c754bf40fc300..3d1d526476e6dd 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -954,6 +954,42 @@ def test_get_body_malformed(self): # AttributeError: 'str' object has no attribute 'is_attachment' m.get_body() + def test_get_payload_unicode_surrogate1(self): + """test that fix for GH issue 94606 does not break this""" + msg = "String that could have been decod\udcc3\udcabd with surrogateescape" + expected = b'String that could have been decod\xc3\xabd with surrogateescape' + m = self._str_msg(msg) + payload = m.get_payload(decode=True) + self.assertEqual(expected, payload) + + def test_get_payload_unicode_surrogate2(self): + """test that fix for GH issue 94606 does not break this""" + msg = "Unicode string with a utf-8 charactër" + expected = b'Unicode string with a utf-8 charact\xebr' + m = self._str_msg(msg) + payload = m.get_payload(decode=True) + self.assertEqual(expected, payload) + + def test_get_payload_unicode_surrogate3(self): + """test for GH issue 94606""" + msg = "String that could not have been dëcod\udcc3\udcabd with surrogateescape" + expected = b'String that could not have been d\xebcod\\udcc3\udcabd with surrogateescape' + m = self._str_msg(msg) + # In GH issue 94606, this would raise + # UnicodeEncodeError: 'ascii' codec can't encode character '\xeb' in position 33: ordinal not in range(128) + payload = m.get_payload(decode=True) + self.assertEqual(expected, payload) + + def test_get_payload_unicode_surrogate4(self): + """test for GH issue 94606""" + msg = "Different reason \udfff could not have been decoded with surrogateescape" + expected = b'Different reason \\udfff could not have been decoded with surrogateescape' + m = self._str_msg(msg) + # In GH issue 94606, this would raise + # UnicodeEncodeError: 'ascii' codec can't encode character '\udfff' in position 17: ordinal not in range(128) + payload = m.get_payload(decode=True) + self.assertEqual(expected, payload) + class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two From 71a500d11428e146e4c8a4a55339c291f92321fa Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 7 Jul 2022 05:37:54 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2022-07-07-05-37-53.gh-issue-94606.hojJ54.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-07-05-37-53.gh-issue-94606.hojJ54.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-07-05-37-53.gh-issue-94606.hojJ54.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-07-05-37-53.gh-issue-94606.hojJ54.rst new file mode 100644 index 00000000000000..5201ab7d842088 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-07-05-37-53.gh-issue-94606.hojJ54.rst @@ -0,0 +1,3 @@ +Fix UnicodeEncodeError when :func:`email.message.get_payload` reads a message +with a Unicode surrogate character and the message content is not well-formed for +surrogateescape encoding. Patch by Sidney Markowitz. From 9ca0fb838aad39f3c076c2c57f7bf8b51a74c6b5 Mon Sep 17 00:00:00 2001 From: Erlend Egeberg Aasland Date: Thu, 7 Jul 2022 01:13:30 +0200 Subject: [PATCH 3/7] gh-94630: Update sqlite3 docs with positional-only and keyword-only symbols (#94631) --- Doc/library/sqlite3.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index b5cd6894e8060a..d3b7d0723991bd 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -332,7 +332,7 @@ Module functions and constants Added the ``sqlite3.connect/handle`` auditing event. -.. function:: register_converter(typename, converter) +.. function:: register_converter(typename, converter, /) Register the *converter* callable to convert SQLite objects of type *typename* into a Python object of a specific type. @@ -346,7 +346,7 @@ Module functions and constants case-insensitively. -.. function:: register_adapter(type, adapter) +.. function:: register_adapter(type, adapter, /) Register an *adapter* callable to adapt the Python type *type* into an SQLite type. @@ -368,7 +368,7 @@ Module functions and constants .. literalinclude:: ../includes/sqlite3/complete_statement.py -.. function:: enable_callback_tracebacks(flag) +.. function:: enable_callback_tracebacks(flag, /) By default you will not get any tracebacks in user-defined functions, aggregates, converters, authorizer callbacks etc. If you want to debug them, @@ -508,7 +508,7 @@ Connection Objects .. literalinclude:: ../includes/sqlite3/md5func.py - .. method:: create_aggregate(name, n_arg, aggregate_class) + .. method:: create_aggregate(name, /, n_arg, aggregate_class) Creates a user-defined aggregate function. @@ -647,7 +647,7 @@ Connection Objects .. versionadded:: 3.3 - .. method:: enable_load_extension(enabled) + .. method:: enable_load_extension(enabled, /) This routine allows/disallows the SQLite engine to load SQLite extensions from shared libraries. SQLite extensions can define new functions, @@ -665,7 +665,7 @@ Connection Objects .. literalinclude:: ../includes/sqlite3/load_extension.py - .. method:: load_extension(path) + .. method:: load_extension(path, /) This routine loads an SQLite extension from a shared library. You have to enable extension loading with :meth:`enable_load_extension` before you can @@ -876,7 +876,7 @@ Cursor Objects .. index:: single: ? (question mark); in SQL statements .. index:: single: : (colon); in SQL statements - .. method:: execute(sql[, parameters]) + .. method:: execute(sql, parameters=(), /) Execute an SQL statement. Values may be bound to the statement using :ref:`placeholders `. @@ -892,7 +892,7 @@ Cursor Objects a transaction is implicitly opened before executing *sql*. - .. method:: executemany(sql, seq_of_parameters) + .. method:: executemany(sql, seq_of_parameters, /) Execute a :ref:`parameterized ` SQL command against all parameter sequences or mappings found in the sequence @@ -907,7 +907,7 @@ Cursor Objects .. literalinclude:: ../includes/sqlite3/executemany_2.py - .. method:: executescript(sql_script) + .. method:: executescript(sql_script, /) Execute multiple SQL statements at once. If there is a pending transaciton, @@ -957,11 +957,11 @@ Cursor Objects The cursor will be unusable from this point forward; a :exc:`ProgrammingError` exception will be raised if any operation is attempted with the cursor. - .. method:: setinputsizes(sizes) + .. method:: setinputsizes(sizes, /) Required by the DB-API. Does nothing in :mod:`sqlite3`. - .. method:: setoutputsize(size [, column]) + .. method:: setoutputsize(size, column=None, /) Required by the DB-API. Does nothing in :mod:`sqlite3`. From ec63ab5e1efa10eb84917b244c46eab7de6fcea1 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 7 Jul 2022 07:38:36 +0100 Subject: [PATCH 4/7] gh-92228: disable the compiler's 'small exit block inlining' optimization for blocks that have a line number (GH-94592) Inlining of code that corresponds to source code lines, can make it hard to distinguish later between code which is only reachable from except handlers, and that which is reachable in normal control flow. This caused problems with the debugger's jump feature. This PR turns off the inlining optimisation for code which has line numbers. We still inline things like the implicit "return None". --- Lib/test/test_dis.py | 27 +++++-------------- Lib/test/test_peepholer.py | 2 ++ Lib/test/test_sys_settrace.py | 9 +++++++ ...2-07-06-14-02-26.gh-issue-92228.44Cbly.rst | 1 + Python/compile.c | 14 ++++++++++ 5 files changed, 33 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-06-14-02-26.gh-issue-92228.44Cbly.rst diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 256004855d4a6e..440e4ed5eacfb0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -360,13 +360,13 @@ def bug42562(): --> BINARY_OP 11 (/) POP_TOP -%3d LOAD_FAST_CHECK 1 (tb) +%3d >> LOAD_FAST_CHECK 1 (tb) RETURN_VALUE >> PUSH_EXC_INFO %3d LOAD_GLOBAL 0 (Exception) CHECK_EXC_MATCH - POP_JUMP_FORWARD_IF_FALSE 23 (to 82) + POP_JUMP_FORWARD_IF_FALSE 22 (to 80) STORE_FAST 0 (e) %3d LOAD_FAST 0 (e) @@ -376,9 +376,7 @@ def bug42562(): LOAD_CONST 0 (None) STORE_FAST 0 (e) DELETE_FAST 0 (e) - -%3d LOAD_FAST 1 (tb) - RETURN_VALUE + JUMP_BACKWARD 29 (to 14) >> LOAD_CONST 0 (None) STORE_FAST 0 (e) DELETE_FAST 0 (e) @@ -396,7 +394,6 @@ def bug42562(): TRACEBACK_CODE.co_firstlineno + 5, TRACEBACK_CODE.co_firstlineno + 3, TRACEBACK_CODE.co_firstlineno + 4, - TRACEBACK_CODE.co_firstlineno + 5, TRACEBACK_CODE.co_firstlineno + 3) def _fstring(a, b, c, d): @@ -443,7 +440,7 @@ def _with(c): CALL 2 POP_TOP -%3d LOAD_CONST 2 (2) +%3d >> LOAD_CONST 2 (2) STORE_FAST 2 (y) LOAD_CONST 0 (None) RETURN_VALUE @@ -456,11 +453,7 @@ def _with(c): POP_EXCEPT POP_TOP POP_TOP - -%3d LOAD_CONST 2 (2) - STORE_FAST 2 (y) - LOAD_CONST 0 (None) - RETURN_VALUE + JUMP_BACKWARD 13 (to 30) >> COPY 3 POP_EXCEPT RERAISE 1 @@ -472,7 +465,6 @@ def _with(c): _with.__code__.co_firstlineno + 1, _with.__code__.co_firstlineno + 3, _with.__code__.co_firstlineno + 1, - _with.__code__.co_firstlineno + 3, ) async def _asyncwith(c): @@ -510,7 +502,7 @@ async def _asyncwith(c): JUMP_BACKWARD_NO_INTERRUPT 4 (to 48) >> POP_TOP -%3d LOAD_CONST 2 (2) +%3d >> LOAD_CONST 2 (2) STORE_FAST 2 (y) LOAD_CONST 0 (None) RETURN_VALUE @@ -529,11 +521,7 @@ async def _asyncwith(c): POP_EXCEPT POP_TOP POP_TOP - -%3d LOAD_CONST 2 (2) - STORE_FAST 2 (y) - LOAD_CONST 0 (None) - RETURN_VALUE + JUMP_BACKWARD 19 (to 58) >> COPY 3 POP_EXCEPT RERAISE 1 @@ -545,7 +533,6 @@ async def _asyncwith(c): _asyncwith.__code__.co_firstlineno + 1, _asyncwith.__code__.co_firstlineno + 3, _asyncwith.__code__.co_firstlineno + 1, - _asyncwith.__code__.co_firstlineno + 3, ) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 2c3b1ab65a8f9d..e03c42c2f823dc 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -345,6 +345,8 @@ def f(x): self.assertEqual(len(returns), 1) self.check_lnotab(f) + @unittest.skip("Following gh-92228 the return has two predecessors " + "and that prevents jump elimination.") def test_elim_jump_to_return(self): # JUMP_FORWARD to RETURN --> RETURN def f(cond, true_value, false_value): diff --git a/Lib/test/test_sys_settrace.py b/Lib/test/test_sys_settrace.py index 7ec290dbf04ad5..f03b03e19a2528 100644 --- a/Lib/test/test_sys_settrace.py +++ b/Lib/test/test_sys_settrace.py @@ -2042,6 +2042,15 @@ def test_no_jump_within_except_block(output): output.append(6) output.append(7) + @jump_test(6, 1, [1, 5, 1, 5]) + def test_jump_over_try_except(output): + output.append(1) + try: + 1 / 0 + except ZeroDivisionError as e: + output.append(5) + x = 42 # has to be a two-instruction block + @jump_test(2, 4, [1, 4, 5, -4]) def test_jump_across_with(output): output.append(1) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-06-14-02-26.gh-issue-92228.44Cbly.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-06-14-02-26.gh-issue-92228.44Cbly.rst new file mode 100644 index 00000000000000..458ad897cefcb6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-06-14-02-26.gh-issue-92228.44Cbly.rst @@ -0,0 +1 @@ +Disable the compiler's inline-small-exit-blocks optimization for exit blocks that are associated with source code lines. This fixes a bug where the debugger cannot tell where an exception handler ends and the following code block begins. diff --git a/Python/compile.c b/Python/compile.c index 77176893f60c5c..f36a6e85a54c20 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -9224,6 +9224,16 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) return -1; } +static bool +basicblock_has_lineno(const basicblock *bb) { + for (int i = 0; i < bb->b_iused; i++) { + if (bb->b_instr[i].i_loc.lineno > 0) { + return true; + } + } + return false; +} + /* If this block ends with an unconditional jump to an exit block, * then remove the jump and extend this block with the target. */ @@ -9240,6 +9250,10 @@ extend_block(basicblock *bb) { } if (basicblock_exits_scope(last->i_target) && last->i_target->b_iused <= MAX_COPY_SIZE) { basicblock *to_copy = last->i_target; + if (basicblock_has_lineno(to_copy)) { + /* copy only blocks without line number (like implicit 'return None's) */ + return 0; + } last->i_opcode = NOP; for (int i = 0; i < to_copy->b_iused; i++) { int index = basicblock_next_instr(bb); From f9ff8d23b1dfa706d41bb5ecefd55c87c3ec352f Mon Sep 17 00:00:00 2001 From: Erlend Egeberg Aasland Date: Thu, 7 Jul 2022 09:00:20 +0200 Subject: [PATCH 5/7] gh-94628: Add explicit parameter list to sqlite3.connect docs (#94629) Co-authored-by: CAM Gerlach --- Doc/library/sqlite3.rst | 181 +++++++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 76 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index d3b7d0723991bd..ad49a217d3b4a6 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -246,90 +246,89 @@ Module functions and constants (bitwise or) operator. -.. function:: connect(database[, timeout, detect_types, isolation_level, check_same_thread, factory, cached_statements, uri]) - - Opens a connection to the SQLite database file *database*. By default returns a - :class:`Connection` object, unless a custom *factory* is given. - - *database* is a :term:`path-like object` giving the pathname (absolute or - relative to the current working directory) of the database file to be opened. - You can use ``":memory:"`` to open a database connection to a database that - resides in RAM instead of on disk. - - When a database is accessed by multiple connections, and one of the processes - modifies the database, the SQLite database is locked until that transaction is - committed. The *timeout* parameter specifies how long the connection should wait - for the lock to go away until raising an exception. The default for the timeout - parameter is 5.0 (five seconds). - - For the *isolation_level* parameter, please see the - :attr:`~Connection.isolation_level` property of :class:`Connection` objects. - - SQLite natively supports only the types TEXT, INTEGER, REAL, BLOB and NULL. If - you want to use other types you must add support for them yourself. The - *detect_types* parameter and using custom **converters** registered with the - module-level :func:`register_converter` function allow you to easily do that. - - *detect_types* defaults to 0 (type detection disabled). - Set it to any combination (using ``|``, bitwise or) of - :const:`PARSE_DECLTYPES` and :const:`PARSE_COLNAMES` - to enable type detection. - Column names takes precedence over declared types if both flags are set. - Types cannot be detected for generated fields (for example ``max(data)``), - even when the *detect_types* parameter is set. - In such cases, the returned type is :class:`str`. - - By default, *check_same_thread* is :const:`True` and only the creating thread may - use the connection. If set :const:`False`, the returned connection may be shared - across multiple threads. When using multiple threads with the same connection - writing operations should be serialized by the user to avoid data corruption. - - By default, the :mod:`sqlite3` module uses its :class:`Connection` class for the - connect call. You can, however, subclass the :class:`Connection` class and make - :func:`connect` use your class instead by providing your class for the *factory* - parameter. - - Consult the section :ref:`sqlite3-types` of this manual for details. - - The :mod:`sqlite3` module internally uses a statement cache to avoid SQL parsing - overhead. If you want to explicitly set the number of statements that are cached - for the connection, you can set the *cached_statements* parameter. The currently - implemented default is to cache 128 statements. - - If *uri* is :const:`True`, *database* is interpreted as a - :abbr:`URI (Uniform Resource Identifier)` with a file path and an optional - query string. The scheme part *must* be ``"file:"``. The path can be a - relative or absolute file path. The query string allows us to pass - parameters to SQLite. Some useful URI tricks include:: - - # Open a database in read-only mode. - con = sqlite3.connect("file:template.db?mode=ro", uri=True) - - # Don't implicitly create a new database file if it does not already exist. - # Will raise sqlite3.OperationalError if unable to open a database file. - con = sqlite3.connect("file:nosuchdb.db?mode=rw", uri=True) - - # Create a shared named in-memory database. - con1 = sqlite3.connect("file:mem1?mode=memory&cache=shared", uri=True) - con2 = sqlite3.connect("file:mem1?mode=memory&cache=shared", uri=True) - con1.executescript("create table t(t); insert into t values(28);") - rows = con2.execute("select * from t").fetchall() - - More information about this feature, including a list of recognized - parameters, can be found in the - `SQLite URI documentation `_. + +.. function:: connect(database, timeout=5.0, detect_types=0, isolation_level="DEFERRED", check_same_thread=True, factory=sqlite3.Connection, cached_statements=128, uri=False) + + Open a connection to an SQLite database. + + :param database: + The path to the database file to be opened. + Pass ``":memory:"`` to open a connection to a database that is + in RAM instead of on disk. + :type database: :term:`path-like object` + + :param timeout: + How many seconds the connection should wait before raising + an exception, if the database is locked by another connection. + If another connection opens a transaction to modify the database, + it will be locked until that transaction is committed. + Default five seconds. + :type timeout: float + + :param detect_types: + Control whether and how data types not + :ref:`natively supported by SQLite ` + are looked up to be converted to Python types, + using the converters registered with :func:`register_converter`. + Set it to any combination (using ``|``, bitwise or) of + :const:`PARSE_DECLTYPES` and :const:`PARSE_COLNAMES` + to enable this. + Column names takes precedence over declared types if both flags are set. + Types cannot be detected for generated fields (for example ``max(data)``), + even when the *detect_types* parameter is set; :class:`str` will be + returned instead. + By default (``0``), type detection is disabled. + :type detect_types: int + + :param isolation_level: + The :attr:`~Connection.isolation_level` of the connection, + controlling whether and how transactions are implicitly opened. + Can be ``"DEFERRED"`` (default), ``"EXCLUSIVE"`` or ``"IMMEDIATE"``; + or :const:`None` to disable opening transactions implicitly. + See :ref:`sqlite3-controlling-transactions` for more. + :type isolation_level: str | None + + :param check_same_thread: + If :const:`True` (default), only the creating thread may use the connection. + If :const:`False`, the connection may be shared across multiple threads; + if so, write operations should be serialized by the user to avoid data + corruption. + :type check_same_thread: bool + + :param factory: + A custom subclass of :class:`Connection` to create the connection with, + if not the default :class:`Connection` class. + :type factory: :class:`Connection` + + :param cached_statements: + The number of statements that ``sqlite3`` + should internally cache for this connection, to avoid parsing overhead. + By default, 128 statements. + :type cached_statements: int + + :param uri: + If set to :const:`True`, *database* is interpreted as a + :abbr:`URI (Uniform Resource Identifier)` with a file path + and an optional query string. + The scheme part *must* be ``"file:"``, + and the path can be relative or absolute. + The query string allows passing parameters to SQLite, + enabling various :ref:`sqlite3-uri-tricks`. + :type uri: bool + + :rtype: sqlite3.Connection .. audit-event:: sqlite3.connect database sqlite3.connect .. audit-event:: sqlite3.connect/handle connection_handle sqlite3.connect - .. versionchanged:: 3.4 - Added the *uri* parameter. + .. versionadded:: 3.4 + The *uri* parameter. .. versionchanged:: 3.7 *database* can now also be a :term:`path-like object`, not only a string. - .. versionchanged:: 3.10 - Added the ``sqlite3.connect/handle`` auditing event. + .. versionadded:: 3.10 + The ``sqlite3.connect/handle`` auditing event. .. function:: register_converter(typename, converter, /) @@ -1480,6 +1479,36 @@ regardless of the value of :attr:`~Connection.isolation_level`. https://www.sqlite.org/lang_transaction.html#deferred_immediate_and_exclusive_transactions +.. _sqlite3-uri-tricks: + +SQLite URI tricks +----------------- + +Some useful URI tricks include: + +* Open a database in read-only mode:: + + con = sqlite3.connect("file:template.db?mode=ro", uri=True) + +* Do not implicitly create a new database file if it does not already exist; + will raise :exc:`~sqlite3.OperationalError` if unable to create a new file:: + + con = sqlite3.connect("file:nosuchdb.db?mode=rw", uri=True) + +* Create a shared named in-memory database:: + + con1 = sqlite3.connect("file:mem1?mode=memory&cache=shared", uri=True) + con2 = sqlite3.connect("file:mem1?mode=memory&cache=shared", uri=True) + con1.execute("create table t(t)") + con1.execute("insert into t values(28)") + con1.commit() + rows = con2.execute("select * from t").fetchall() + +More information about this feature, including a list of parameters, +can be found in the `SQLite URI documentation`_. + +.. _SQLite URI documentation: https://www.sqlite.org/uri.html + Using :mod:`sqlite3` efficiently -------------------------------- From e12e04ce98a91d02cc25d2a4ea870e3f32fe5a6c Mon Sep 17 00:00:00 2001 From: Sidney Markowitz Date: Thu, 7 Jul 2022 20:39:23 +1200 Subject: [PATCH 6/7] gh-94606: Condense unit tests for issue into one parameterized test --- Lib/test/test_email/test_message.py | 65 +++++++++++++---------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 3d1d526476e6dd..08ba82f330ad76 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -746,6 +746,35 @@ def test_iter_attachments_mutation(self): self.assertEqual(len(list(m.iter_attachments())), 2) self.assertEqual(m.get_payload(), orig) + get_payload_surrogate_params = { + + 'good_surrogateescape': ( + "String that can be encod\udcc3\udcabd with surrogateescape", + b'String that can be encod\xc3\xabd with surrogateescape' + ), + + 'string_with_utf8': ( + "String with utf-8 charactër", + b'String with utf-8 charact\xebr' + ), + + 'surrogate_and_utf8': ( + "String that cannot be ëncod\udcc3\udcabd with surrogateescape", + b'String that cannot be \xebncod\\udcc3\\udcabd with surrogateescape' + ), + + 'out_of_range_surrogate': ( + "String with \udfff cannot be encoded with surrogateescape", + b'String with \\udfff cannot be encoded with surrogateescape' + ), + } + + def get_payload_surrogate_as_gh_94606(self, msg, expected): + """test for GH issue 94606""" + m = self._str_msg(msg) + payload = m.get_payload(decode=True) + self.assertEqual(expected, payload) + class TestEmailMessage(TestEmailMessageBase, TestEmailBase): message = EmailMessage @@ -954,42 +983,6 @@ def test_get_body_malformed(self): # AttributeError: 'str' object has no attribute 'is_attachment' m.get_body() - def test_get_payload_unicode_surrogate1(self): - """test that fix for GH issue 94606 does not break this""" - msg = "String that could have been decod\udcc3\udcabd with surrogateescape" - expected = b'String that could have been decod\xc3\xabd with surrogateescape' - m = self._str_msg(msg) - payload = m.get_payload(decode=True) - self.assertEqual(expected, payload) - - def test_get_payload_unicode_surrogate2(self): - """test that fix for GH issue 94606 does not break this""" - msg = "Unicode string with a utf-8 charactër" - expected = b'Unicode string with a utf-8 charact\xebr' - m = self._str_msg(msg) - payload = m.get_payload(decode=True) - self.assertEqual(expected, payload) - - def test_get_payload_unicode_surrogate3(self): - """test for GH issue 94606""" - msg = "String that could not have been dëcod\udcc3\udcabd with surrogateescape" - expected = b'String that could not have been d\xebcod\\udcc3\udcabd with surrogateescape' - m = self._str_msg(msg) - # In GH issue 94606, this would raise - # UnicodeEncodeError: 'ascii' codec can't encode character '\xeb' in position 33: ordinal not in range(128) - payload = m.get_payload(decode=True) - self.assertEqual(expected, payload) - - def test_get_payload_unicode_surrogate4(self): - """test for GH issue 94606""" - msg = "Different reason \udfff could not have been decoded with surrogateescape" - expected = b'Different reason \\udfff could not have been decoded with surrogateescape' - m = self._str_msg(msg) - # In GH issue 94606, this would raise - # UnicodeEncodeError: 'ascii' codec can't encode character '\udfff' in position 17: ordinal not in range(128) - payload = m.get_payload(decode=True) - self.assertEqual(expected, payload) - class TestMIMEPart(TestEmailMessageBase, TestEmailBase): # Doing the full test run here may seem a bit redundant, since the two From 36619d5871c5eb192ee6cf6eaaaaaf4bf9f0e989 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 11 Dec 2023 17:17:10 +0200 Subject: [PATCH 7/7] Optimization. --- Lib/email/message.py | 29 +++++++++++++++-------------- Lib/email/utils.py | 16 +--------------- 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/Lib/email/message.py b/Lib/email/message.py index 951b4ecf1230cf..fe769580fed5d0 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -289,25 +289,26 @@ def get_payload(self, i=None, decode=False): # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. - if isinstance(payload, str): - if utils._has_decoded_with_surrogateescape(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: + if not decode: + if isinstance(payload, str) and utils._has_surrogates(payload): + try: + bpayload = payload.encode('ascii', 'surrogateescape') try: payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII code points in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: + except UnicodeEncodeError: + pass return payload + if isinstance(payload, str): + try: + bpayload = payload.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII code points in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 2a19da94132617..9175f2fdb6e69e 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -47,27 +47,13 @@ def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 # (Python's default encoding) can encode any string. This is the fastest - # way to check for surrogates, see issue 11454 (moved to GH 55663) for timings. - # This will pass some strings that are not valid for surrogateescape encoding. + # way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings. try: s.encode() return False except UnicodeEncodeError: return True -def _has_decoded_with_surrogateescape(s): - """Return True if s is a valid str decoded using surrogateescape""" - # Slower test than _has_surrogates to be used when the string must - # be encodable with surrogateescape, but is no slower if the string - # does not have any unicode surrogate characters. - if _has_surrogates(s): - try: - s.encode('ascii', 'surrogateescape') - except UnicodeEncodeError: - return False - return True - return False - # How to deal with a string containing bytes before handing it to the # application through the 'normal' interface. def _sanitize(string):