[mypyc] Add tests for chr(), ord(), encode() and decode() (#10914)

97littleleaf11 · web-flow · commit 68a67aedb072 · 2021-08-05T13:52:47.000+01:00
Add several run tests:

* chr()
* ord()
* decode()
* encode()
diff --git a/mypyc/test-data/fixtures/ir.py b/mypyc/test-data/fixtures/ir.py
@@ -74,10 +74,11 @@ def split(self, sep: Optional[str] = None, max: Optional[int] = None) -> List[st
     def strip (self, item: str) -> str: pass
     def join(self, x: Iterable[str]) -> str: pass
     def format(self, *args: Any, **kwargs: Any) -> str: ...
-    def upper(self) -> str: pass
-    def startswith(self, x: str, start: int=..., end: int=...) -> bool: pass
-    def endswith(self, x: str, start: int=..., end: int=...) -> bool: pass
-    def replace(self, old: str, new: str, maxcount: Optional[int] = None) -> str: pass
+    def upper(self) -> str: ...
+    def startswith(self, x: str, start: int=..., end: int=...) -> bool: ...
+    def endswith(self, x: str, start: int=..., end: int=...) -> bool: ...
+    def replace(self, old: str, new: str, maxcount: int=...) -> str: ...
+    def encode(self, x: str=..., y: str=...) -> bytes: ...
 
 class float:
     def __init__(self, x: object) -> None: pass
@@ -97,14 +98,15 @@ def __neg__(self) -> complex: pass
 
 class bytes:
     @overload
-    def __init__(self) -> None: pass
+    def __init__(self) -> None: ...
     @overload
-    def __init__(self, x: object) -> None: pass
-    def __add__(self, x: bytes) -> bytes: pass
-    def __eq__(self, x: object) -> bool: pass
-    def __ne__(self, x: object) -> bool: pass
-    def __getitem__(self, i: int) -> int: pass
-    def join(self, x: Iterable[object]) -> bytes: pass
+    def __init__(self, x: object) -> None: ...
+    def __add__(self, x: bytes) -> bytes: ...
+    def __eq__(self, x: object) -> bool: ...
+    def __ne__(self, x: object) -> bool: ...
+    def __getitem__(self, i: int) -> int: ...
+    def join(self, x: Iterable[object]) -> bytes: ...
+    def decode(self, x: str, y: str=...) -> str: ...
 
 class bytearray:
     @overload
@@ -253,6 +255,10 @@ class IndexError(LookupError): pass
 
 class RuntimeError(Exception): pass
 
+class UnicodeEncodeError(RuntimeError): pass
+
+class UnicodeDecodeError(RuntimeError): pass
+
 class NotImplementedError(RuntimeError): pass
 
 class StopIteration(Exception):
@@ -284,6 +290,8 @@ def abs(x: float) -> float: ...
 def exit() -> None: ...
 def repr(o: object) -> str: ...
 def ascii(o: object) -> str: ...
+def ord(o: object) -> int: ...
+def chr(i: int) -> str: ...
 
 # Dummy definitions.
 class classmethod: pass
diff --git a/mypyc/test-data/run-strings.test b/mypyc/test-data/run-strings.test
@@ -1,6 +1,6 @@
 # Test cases for strings (compile and run)
 
-[case testStr]
+[case testStrBasics]
 from typing import Tuple
 def f() -> str:
     return 'some string'
@@ -511,3 +511,109 @@ def test_format_method_python_doc() -> None:
                         '    9    9   11 1001',\
                         '   10    A   12 1010',\
                         '   11    B   13 1011']
+
+[case testChrOrdEncodeDecode]
+# Some test cases are from https://docs.python.org/3/howto/unicode.html
+
+def try_invalid(x: int) -> bool:
+    try:
+        chr(x + int())
+        return False
+    except ValueError:
+        return True
+
+def test_chr() -> None:
+    assert chr(57344) == '\ue000'
+    assert chr(0) == '\x00'
+    assert chr(65) == 'A'
+    assert chr(150) == '\x96'
+    try:
+        chr(-1)
+        assert False
+    except ValueError:
+        pass
+    try:
+        chr(1114112)
+        assert False
+    except ValueError:
+        pass
+    assert chr(1114111) == '\U0010ffff'
+    x = 0
+    assert chr(x + int()) == '\x00'
+    x = 100
+    assert chr(x + int()) == 'd'
+    x = 150
+    assert chr(x + int()) == '\x96'
+    x = 257
+    assert chr(x + int()) == 'ā'
+    x = 65537
+    assert chr(x + int()) == '𐀁'
+    assert try_invalid(-1)
+    assert try_invalid(1114112)
+
+def test_ord() -> None:
+    assert ord('\ue000') == 57344
+    s = "a\xac\u1234\u20ac\U00008000"
+    # ^^^^ two-digit hex escape
+    #   ^^^^^^ four-digit Unicode escape
+    #           ^^^^^^^^^^ eight-digit Unicode escape
+    l1 = [ord(c) for c in s]
+    assert l1 == [97, 172, 4660, 8364, 32768]
+    u = 'abcdé'
+    assert ord(u[-1]) == 233
+    assert ord(b'a') == 97
+    assert ord(b'a' + bytes()) == 97
+    u2 = '\U0010ffff'
+    assert ord(u2) == 1114111
+    try:
+        ord('aa')
+        assert False
+    except TypeError:
+        pass
+
+def test_decode() -> None:
+    assert "\N{GREEK CAPITAL LETTER DELTA}" == '\u0394'
+    assert "\u0394" == "\u0394"
+    assert "\U00000394" == '\u0394'
+    assert b'\x80abc'.decode("utf-8", "replace") == '\ufffdabc'
+    assert b'\x80abc'.decode("utf-8", "backslashreplace") == '\\x80abc'
+    assert b'\x80abc'.decode("utf-8", "ignore") == 'abc'
+    assert b'\x80abc'.decode("UTF-8", "ignore") == 'abc'
+    assert b'\x80abc'.decode("Utf-8", "ignore") == 'abc'
+    assert b'\x80abc'.decode("utf_8", "ignore") == 'abc'
+    assert b'\x80abc'.decode("latin1", "ignore") == '\x80abc'
+    assert b'\xd2\xbb\xb6\xfe\xc8\xfd'.decode("gbk", "ignore") == '一二三'
+    assert b'\xd2\xbb\xb6\xfe\xc8\xfd'.decode("latin1", "ignore") == 'Ò»¶þÈý'
+    assert b'Z\xc3\xbcrich'.decode("utf-8") == 'Zürich'
+    try:
+        b'Z\xc3\xbcrich'.decode("ascii")
+        assert False
+    except UnicodeDecodeError:
+        pass
+
+def test_encode() -> None:
+    u = chr(40960) + 'abcd' + chr(1972)
+    assert u.encode() == b'\xea\x80\x80abcd\xde\xb4'
+    assert u.encode('utf-8') == b'\xea\x80\x80abcd\xde\xb4'
+    try:
+        u.encode('ascii')
+        assert False
+    except UnicodeEncodeError:
+        pass
+    assert u.encode('ascii', 'ignore') == b'abcd'
+    assert u.encode('ASCII', 'ignore') == b'abcd'
+    assert u.encode('ascii', 'replace') == b'?abcd?'
+    assert u.encode('ascii', 'xmlcharrefreplace') == b'&#40960;abcd&#1972;'
+    assert u.encode('ascii', 'backslashreplace') == b'\\ua000abcd\\u07b4'
+    assert u.encode('ascii', 'namereplace') == b'\\N{YI SYLLABLE IT}abcd\\u07b4'
+    assert 'pythön!'.encode() == b'pyth\xc3\xb6n!'
+    assert '一二三'.encode('gbk') == b'\xd2\xbb\xb6\xfe\xc8\xfd'
+    assert u.encode('UTF-8', 'ignore') == b'\xea\x80\x80abcd\xde\xb4'
+    assert u.encode('Utf_8') == b'\xea\x80\x80abcd\xde\xb4'
+    assert u.encode('UTF_8') == b'\xea\x80\x80abcd\xde\xb4'
+    assert u'\u00E1'.encode('latin1') == b'\xe1'
+    try:
+        u.encode('latin1')
+        assert False
+    except UnicodeEncodeError:
+        pass