Skip to content

Commit 365a6cb

Browse files
committed
pythongh-98433: Fix quadratic time idna decoding.
There was an unnecessary quadratic loop in idna decoding. This restores the behavior to linear. An early length check would still be a good idea given that DNS IDNA label names cannot be more than 63 ASCII characters.
1 parent 016c7d3 commit 365a6cb

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

Lib/encodings/idna.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def nameprep(label):
3939

4040
# Check bidi
4141
RandAL = [stringprep.in_table_d1(x) for x in label]
42+
any_in_table_d2 = any(stringprep.in_table_d2(x) for x in label)
4243
for c in RandAL:
4344
if c:
4445
# There is a RandAL char in the string. Must perform further
@@ -47,7 +48,7 @@ def nameprep(label):
4748
# This is table C.8, which was already checked
4849
# 2) If a string contains any RandALCat character, the string
4950
# MUST NOT contain any LCat character.
50-
if any(stringprep.in_table_d2(x) for x in label):
51+
if any_in_table_d2:
5152
raise UnicodeError("Violation of BIDI requirement 2")
5253

5354
# 3) If a string contains any RandALCat character, a

Lib/test/test_codecs.py

+16
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import io
44
import locale
55
import sys
6+
import time
67
import unittest
78
import encodings
89
from unittest import mock
@@ -1552,6 +1553,21 @@ def test_builtin_encode(self):
15521553
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
15531554
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
15541555

1556+
def test_builtin_decode_length_limit(self):
1557+
get_time = time.process_time
1558+
if get_time() <= 0: # some platforms like WASM lack process_time()
1559+
get_time = time.monotonic
1560+
# This was slow prior to GH-98433's quadratic loop being fixed.
1561+
# Before: 12s on a rpi4 --with-pydebug. After: 0.12s
1562+
with self.assertRaises(UnicodeError) as ctx:
1563+
start = get_time()
1564+
(b"xn--016c"+b"a"*1000).decode("idna")
1565+
seconds_to_decode_idna_length_fail = get_time() - start
1566+
self.assertIn("too long", str(ctx.exception))
1567+
self.assertLess(
1568+
elapsed_seconds, 4,
1569+
msg="idna decoding length failure took waaaay too long")
1570+
15551571
def test_stream(self):
15561572
r = codecs.getreader("idna")(io.BytesIO(b"abc"))
15571573
r.read(3)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The IDNA codec decoder used on DNS hostnames no longer involves a quadratic
2+
algorithm. This prevents a potential CPU denial of service if an out-of-spec
3+
excessive length hostname involving bidirectional characters is decoded.

0 commit comments

Comments
 (0)