python · utkarsh261 · Jun 26, 2020 · Jun 26, 2020
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
@@ -63,14 +63,15 @@ def ToASCII(label):
     try:
         # Step 1: try ASCII
         label = label.encode("ascii")
-    except UnicodeError:
+    except UnicodeEncodeError:
         pass
     else:
         # Skip to step 3: UseSTD3ASCIIRules is false, so
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeError("label empty or too long")
+        raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                 "label empty or too long")
 
     # Step 2: nameprep
     label = nameprep(label)
@@ -79,17 +80,18 @@ def ToASCII(label):
     # Step 4: try ASCII
     try:
         label = label.encode("ascii")
-    except UnicodeError:
+    except UnicodeEncodeError:
         pass
     else:
         # Skip to step 8.
         if 0 < len(label) < 64:
             return label
-        raise UnicodeError("label empty or too long")
+        raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                 "label empty or too long")
 
     # Step 5: Check ACE prefix
     if label.startswith(sace_prefix):
-        raise UnicodeError("Label starts with ACE prefix")
+        raise UnicodeEncodeError("ascii", str(label), 0, len(label), "label starts with ACE prefix")
 
     # Step 6: Encode with PUNYCODE
     label = label.encode("punycode")
@@ -100,7 +102,9 @@ def ToASCII(label):
     # Step 8: Check size
     if 0 < len(label) < 64:
         return label
-    raise UnicodeError("label empty or too long")
+    raise UnicodeEncodeError("punycode", label.decode("punycode"), 0,
+                             len(label.decode("punycode")), "label empty or too long")
+
 
 def ToUnicode(label):
     # Step 1: Check for ASCII
@@ -110,16 +114,18 @@ def ToUnicode(label):
         try:
             label = label.encode("ascii")
             pure_ascii = True
-        except UnicodeError:
+        except UnicodeEncodeError:
             pure_ascii = False
     if not pure_ascii:
         # Step 2: Perform nameprep
         label = nameprep(label)
         # It doesn't say this, but apparently, it should be ASCII now
         try:
             label = label.encode("ascii")
-        except UnicodeError:
-            raise UnicodeError("Invalid character in IDN label")
+        except UnicodeEncodeError:
+            raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                     "Invalid character in IDN label")
+
     # Step 3: Check for ACE prefix
     if not label.startswith(ace_prefix):
         return str(label, "ascii")
@@ -162,9 +168,11 @@ def encode(self, input, errors='strict'):
             labels = result.split(b'.')
             for label in labels[:-1]:
                 if not (0 < len(label) < 64):
-                    raise UnicodeError("label empty or too long")
+                    raise UnicodeEncodeError("ascii", label.decode("ascii"), 0, len(label.decode("ascii")),
+                                             "label empty or too long")
             if len(labels[-1]) >= 64:
-                raise UnicodeError("label too long")
+                raise UnicodeEncodeError("ascii", labels[-1].decode("ascii"), 0, len(labels[-1].decode("ascii")),
+                                         "label too long")
             return result, len(input)
 
         result = bytearray()

diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
@@ -134,16 +134,17 @@ def decode_generalized_number(extended, extpos, bias, errors):
             char = ord(extended[extpos])
         except IndexError:
             if errors == "strict":
-                raise UnicodeError("incomplete punicode string")
+                raise UnicodeDecodeError("punycode", bytes(extended[extpos], "utf-8"), extpos, extpos+1,
+                                       "incomplete punycode string")
             return extpos + 1, None
         extpos += 1
         if 0x41 <= char <= 0x5A: # A-Z
             digit = char - 0x41
         elif 0x30 <= char <= 0x39:
             digit = char - 22 # 0x30-26
         elif errors == "strict":
-            raise UnicodeError("Invalid extended code point '%s'"
-                               % extended[extpos-1])
+            raise UnicodeDecodeError("punycode", bytes(extended[extpos-1], "utf-8"), extpos-1, extpos,
+                                     "Invalid extended code point '%s'" % extended[extpos-1])
         else:
             return extpos, None
         t = T(j, bias)
@@ -171,7 +172,7 @@ def insertion_sort(base, extended, errors):
         char += pos // (len(base) + 1)
         if char > 0x10FFFF:
             if errors == "strict":
-                raise UnicodeError("Invalid character U+%x" % char)
+                raise UnicodeDecodeError("punycode", bytes(char, "utf-8"), 0, len(char), "Invalid character U+%x" % char)
             char = ord('?')
         pos = pos % (len(base) + 1)
         base = base[:pos] + chr(char) + base[pos:]

diff --git a/Lib/encodings/undefined.py b/Lib/encodings/undefined.py
@@ -1,6 +1,6 @@
 """ Python 'undefined' Codec
 
-    This codec will always raise a ValueError exception when being
+    This codec will always raise a UnicodeEncodeError | UnicodeDecodeError exception when being
     used. It is intended for use by the site.py file to switch off
     automatic string to Unicode coercion.
 
@@ -16,18 +16,18 @@
 class Codec(codecs.Codec):
 
     def encode(self,input,errors='strict'):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")
 
     def decode(self,input,errors='strict'):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")
 
 class IncrementalEncoder(codecs.IncrementalEncoder):
     def encode(self, input, final=False):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeEncodeError("undefined", str(input), 0, len(input), "undefined encoding")
 
 class IncrementalDecoder(codecs.IncrementalDecoder):
     def decode(self, input, final=False):
-        raise UnicodeError("undefined encoding")
+        raise UnicodeDecodeError("undefined", bytes(input), 0, len(input), "undefined decoding")
 
 class StreamWriter(Codec,codecs.StreamWriter):
     pass

diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
@@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final):
             elif byteorder == 1:
                 self.decoder = codecs.utf_16_be_decode
             elif consumed >= 2:
-                raise UnicodeError("UTF-16 stream does not start with BOM")
+                raise UnicodeDecodeError("utc-16", input, 0, 0, "UTF-16 stream does not start with BOM")
             return (output, consumed)
         return self.decoder(input, self.errors, final)
 
@@ -138,7 +138,7 @@ def decode(self, input, errors='strict'):
         elif byteorder == 1:
             self.decode = codecs.utf_16_be_decode
         elif consumed>=2:
-            raise UnicodeError("UTF-16 stream does not start with BOM")
+            raise UnicodeDecodeError("utf-16", input, 0, 0, "UTF-16 stream does not start with BOM")
         return (object, consumed)
 
 ### encodings module API

diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
@@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final):
             elif byteorder == 1:
                 self.decoder = codecs.utf_32_be_decode
             elif consumed >= 4:
-                raise UnicodeError("UTF-32 stream does not start with BOM")
+                raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
             return (output, consumed)
         return self.decoder(input, self.errors, final)
 
@@ -133,7 +133,7 @@ def decode(self, input, errors='strict'):
         elif byteorder == 1:
             self.decode = codecs.utf_32_be_decode
         elif consumed>=4:
-            raise UnicodeError("UTF-32 stream does not start with BOM")
+            raise UnicodeDecodeError("utf-32", input, 0, 0, "UTF-32 stream does not start with BOM")
         return (object, consumed)
 
 ### encodings module API

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -1334,13 +1334,13 @@ def test_decode(self):
 
     def test_decode_invalid(self):
         testcases = [
-            (b"xn--w&", "strict", UnicodeError()),
+            (b"xn--w&", "strict", UnicodeDecodeError("punycode", b"xn--w&", 0, 0, "")),
             (b"xn--w&", "ignore", "xn-"),
         ]
         for puny, errors, expected in testcases:
             with self.subTest(puny=puny, errors=errors):
                 if isinstance(expected, Exception):
-                    self.assertRaises(UnicodeError, puny.decode, "punycode", errors)
+                    self.assertRaises(UnicodeDecodeError, puny.decode, "punycode", errors)
                 else:
                     self.assertEqual(puny.decode("punycode", errors), expected)
 

diff --git a/Misc/NEWS.d/next/Library/2020-06-26-21-36-00.bpo-41115.DpUba5.rst b/Misc/NEWS.d/next/Library/2020-06-26-21-36-00.bpo-41115.DpUba5.rst
@@ -0,0 +1,2 @@
+Modified source to raise Unicode{Decode, Encode} Error rather than bare
+UnicodeError Patch By Utkarsh Pandey
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Modified source to raise Unicode{Decode, Encode} Error rather than bare
		UnicodeError Patch By Utkarsh Pandey