Skip to content

Commit 5a6d123

Browse files
authored
[3.11] gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (GH-108657) (#108674)
(cherry picked from commit 400a1ce)
1 parent a2c05a4 commit 5a6d123

File tree

3 files changed

+41
-2
lines changed

3 files changed

+41
-2
lines changed

Lib/sqlite3/dump.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,28 @@
77
# future enhancements, you should normally quote any identifier that
88
# is an English language word, even if you do not have to."
99

10+
11+
from contextlib import contextmanager
12+
13+
14+
def _force_decode(bs, *args, **kwargs):
15+
# gh-108590: Don't fail if the database contains invalid Unicode data.
16+
try:
17+
return bs.decode(*args, **kwargs)
18+
except UnicodeDecodeError:
19+
return "".join([chr(c) for c in bs])
20+
21+
22+
@contextmanager
23+
def _text_factory(con, factory):
24+
saved_factory = con.text_factory
25+
con.text_factory = factory
26+
try:
27+
yield
28+
finally:
29+
con.text_factory = saved_factory
30+
31+
1032
def _iterdump(connection):
1133
"""
1234
Returns an iterator to the dump of the database in an SQL text format.
@@ -63,8 +85,9 @@ def _iterdump(connection):
6385
table_name_ident,
6486
",".join("""'||quote("{0}")||'""".format(col.replace('"', '""')) for col in column_names))
6587
query_res = cu.execute(q)
66-
for row in query_res:
67-
yield("{0};".format(row[0]))
88+
with _text_factory(connection, bytes):
89+
for row in query_res:
90+
yield("{0};".format(_force_decode(row[0])))
6891

6992
# Now when the type is 'index', 'trigger', or 'view'
7093
q = """

Lib/test/test_sqlite3/test_dump.py

+15
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,21 @@ def test_dump_virtual_tables(self):
137137
actual = list(self.cx.iterdump())
138138
self.assertEqual(expected, actual)
139139

140+
def test_dump_unicode_invalid(self):
141+
# gh-108590
142+
expected = [
143+
"BEGIN TRANSACTION;",
144+
"CREATE TABLE foo (data TEXT);",
145+
"INSERT INTO \"foo\" VALUES('a\x9f');",
146+
"COMMIT;",
147+
]
148+
self.cu.executescript("""
149+
CREATE TABLE foo (data TEXT);
150+
INSERT INTO foo VALUES (CAST(X'619f' AS TEXT));
151+
""")
152+
actual = list(self.cx.iterdump())
153+
self.assertEqual(expected, actual)
154+
140155

141156
if __name__ == "__main__":
142157
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson

0 commit comments

Comments
 (0)