diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index ea4058512fe366..686a8382ee960c 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -63,14 +63,15 @@ def ToASCII(label): try: # Step 1: try ASCII label = label.encode("ascii") - except UnicodeError: + except UnicodeEncodeError: pass else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. if 0 < len(label) < 64: return label - raise UnicodeError("label empty or too long") + raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")), + "label empty or too long") # Step 2: nameprep label = nameprep(label) @@ -79,17 +80,18 @@ def ToASCII(label): # Step 4: try ASCII try: label = label.encode("ascii") - except UnicodeError: + except UnicodeEncodeError: pass else: # Skip to step 8. if 0 < len(label) < 64: return label - raise UnicodeError("label empty or too long") + raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")), + "label empty or too long") # Step 5: Check ACE prefix if label.startswith(sace_prefix): - raise UnicodeError("Label starts with ACE prefix") + raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label starts with ACE prefix") # Step 6: Encode with PUNYCODE label = label.encode("punycode") @@ -100,7 +102,9 @@ def ToASCII(label): # Step 8: Check size if 0 < len(label) < 64: return label - raise UnicodeError("label empty or too long") + raise UnicodeEncodeError("punycode", label.decode("punycode"), 0, + len(label.decode("punycode")), "label empty or too long") + def ToUnicode(label): # Step 1: Check for ASCII @@ -110,7 +114,7 @@ def ToUnicode(label): try: label = label.encode("ascii") pure_ascii = True - except UnicodeError: + except UnicodeEncodeError: pure_ascii = False if not pure_ascii: # Step 2: Perform nameprep @@ -118,8 +122,10 @@ def ToUnicode(label): # It doesn't say this, but apparently, it should be ASCII now try: label = label.encode("ascii") - except UnicodeError: - raise UnicodeError("Invalid character in IDN label") + except UnicodeEncodeError: + raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")), + "Invalid character in IDN label") + # Step 3: Check for ACE prefix if not label.startswith(ace_prefix): return str(label, "ascii") @@ -162,9 +168,11 @@ def encode(self, input, errors='strict'): labels = result.split(b'.') for label in labels[:-1]: if not (0 < len(label) < 64): - raise UnicodeError("label empty or too long") + raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")), + "label empty or too long") if len(labels[-1]) >= 64: - raise UnicodeError("label too long") + raise UnicodeEncodeError("ascii", labels[-1].decode("ascii"), 0, len(labels[-1].decode("ascii")), + "label too long") return result, len(input) result = bytearray() diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 1c5726447077b1..e3438e895769a7 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -134,7 +134,8 @@ def decode_generalized_number(extended, extpos, bias, errors): char = ord(extended[extpos]) except IndexError: if errors == "strict": - raise UnicodeError("incomplete punicode string") + raise UnicodeDecodeError("punycode", bytes(extended[extpos], "utf-8"), extpos, extpos+1, + "incomplete punycode string") return extpos + 1, None extpos += 1 if 0x41 <= char <= 0x5A: # A-Z @@ -142,8 +143,8 @@ def decode_generalized_number(extended, extpos, bias, errors): elif 0x30 <= char <= 0x39: digit = char - 22 # 0x30-26 elif errors == "strict": - raise UnicodeError("Invalid extended code point '%s'" - % extended[extpos-1]) + raise UnicodeDecodeError("punycode", bytes(extended[extpos-1], "utf-8"), extpos-1, extpos, + "Invalid extended code point '%s'" % extended[extpos-1]) else: return extpos, None t = T(j, bias) @@ -171,7 +172,7 @@ def insertion_sort(base, extended, errors): char += pos // (len(base) + 1) if char > 0x10FFFF: if errors == "strict": - raise UnicodeError("Invalid character U+%x" % char) + raise UnicodeDecodeError("punycode", bytes(char, "utf-8"), 0, len(char), "Invalid character U+%x" % char) char = ord('?') pos = pos % (len(base) + 1) base = base[:pos] + chr(char) + base[pos:] diff --git a/Lib/encodings/undefined.py b/Lib/encodings/undefined.py index 4690288355c710..8f206227fbb5db 100644 --- a/Lib/encodings/undefined.py +++ b/Lib/encodings/undefined.py @@ -1,6 +1,6 @@ """ Python 'undefined' Codec - This codec will always raise a ValueError exception when being + This codec will always raise a UnicodeEncodeError | UnicodeDecodeError exception when being used. It is intended for use by the site.py file to switch off automatic string to Unicode coercion. @@ -16,18 +16,18 @@ class Codec(codecs.Codec): def encode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") + raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding") def decode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") + raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding") class IncrementalEncoder(codecs.IncrementalEncoder): def encode(self, input, final=False): - raise UnicodeError("undefined encoding") + raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding") class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): - raise UnicodeError("undefined encoding") + raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding") class StreamWriter(Codec,codecs.StreamWriter): pass diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py index c61248242be8c7..baaf9f6c4d8283 100644 --- a/Lib/encodings/utf_16.py +++ b/Lib/encodings/utf_16.py @@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_16_be_decode elif consumed >= 2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utc-16", input, 0, 0, "UTF-16 stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -138,7 +138,7 @@ def decode(self, input, errors='strict'): elif byteorder == 1: self.decode = codecs.utf_16_be_decode elif consumed>=2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utf-16", input, 0, 0, "UTF-16 stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py index cdf84d14129a62..c4c1e2ccfa5279 100644 --- a/Lib/encodings/utf_32.py +++ b/Lib/encodings/utf_32.py @@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_32_be_decode elif consumed >= 4: - raise UnicodeError("UTF-32 stream does not start with BOM") + raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -133,7 +133,7 @@ def decode(self, input, errors='strict'): elif byteorder == 1: self.decode = codecs.utf_32_be_decode elif consumed>=4: - raise UnicodeError("UTF-32 stream does not start with BOM") + raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 54a3520802a4f3..9f856dcee3598d 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1334,13 +1334,13 @@ def test_decode(self): def test_decode_invalid(self): testcases = [ - (b"xn--w&", "strict", UnicodeError()), + (b"xn--w&", "strict", UnicodeDecodeError("punycode", b"xn--w&", 0, 0, "")), (b"xn--w&", "ignore", "xn-"), ] for puny, errors, expected in testcases: with self.subTest(puny=puny, errors=errors): if isinstance(expected, Exception): - self.assertRaises(UnicodeError, puny.decode, "punycode", errors) + self.assertRaises(UnicodeDecodeError, puny.decode, "punycode", errors) else: self.assertEqual(puny.decode("punycode", errors), expected) diff --git a/Misc/NEWS.d/next/Library/2020-06-26-21-36-00.bpo-41115.DpUba5.rst b/Misc/NEWS.d/next/Library/2020-06-26-21-36-00.bpo-41115.DpUba5.rst new file mode 100644 index 00000000000000..9a8809ae51d863 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-26-21-36-00.bpo-41115.DpUba5.rst @@ -0,0 +1,2 @@ +Modified source to raise Unicode{Decode, Encode} Error rather than bare +UnicodeError Patch By Utkarsh Pandey