|
1 | 1 | # Test cases for strings (compile and run)
|
2 | 2 |
|
3 |
| -[case testStr] |
| 3 | +[case testStrBasics] |
4 | 4 | from typing import Tuple
|
5 | 5 | def f() -> str:
|
6 | 6 | return 'some string'
|
@@ -511,3 +511,109 @@ def test_format_method_python_doc() -> None:
|
511 | 511 | ' 9 9 11 1001',\
|
512 | 512 | ' 10 A 12 1010',\
|
513 | 513 | ' 11 B 13 1011']
|
| 514 | + |
| 515 | +[case testChrOrdEncodeDecode] |
| 516 | +# Some test cases are from https://docs.python.org/3/howto/unicode.html |
| 517 | + |
| 518 | +def try_invalid(x: int) -> bool: |
| 519 | + try: |
| 520 | + chr(x + int()) |
| 521 | + return False |
| 522 | + except ValueError: |
| 523 | + return True |
| 524 | + |
| 525 | +def test_chr() -> None: |
| 526 | + assert chr(57344) == '\ue000' |
| 527 | + assert chr(0) == '\x00' |
| 528 | + assert chr(65) == 'A' |
| 529 | + assert chr(150) == '\x96' |
| 530 | + try: |
| 531 | + chr(-1) |
| 532 | + assert False |
| 533 | + except ValueError: |
| 534 | + pass |
| 535 | + try: |
| 536 | + chr(1114112) |
| 537 | + assert False |
| 538 | + except ValueError: |
| 539 | + pass |
| 540 | + assert chr(1114111) == '\U0010ffff' |
| 541 | + x = 0 |
| 542 | + assert chr(x + int()) == '\x00' |
| 543 | + x = 100 |
| 544 | + assert chr(x + int()) == 'd' |
| 545 | + x = 150 |
| 546 | + assert chr(x + int()) == '\x96' |
| 547 | + x = 257 |
| 548 | + assert chr(x + int()) == 'ā' |
| 549 | + x = 65537 |
| 550 | + assert chr(x + int()) == '𐀁' |
| 551 | + assert try_invalid(-1) |
| 552 | + assert try_invalid(1114112) |
| 553 | + |
| 554 | +def test_ord() -> None: |
| 555 | + assert ord('\ue000') == 57344 |
| 556 | + s = "a\xac\u1234\u20ac\U00008000" |
| 557 | + # ^^^^ two-digit hex escape |
| 558 | + # ^^^^^^ four-digit Unicode escape |
| 559 | + # ^^^^^^^^^^ eight-digit Unicode escape |
| 560 | + l1 = [ord(c) for c in s] |
| 561 | + assert l1 == [97, 172, 4660, 8364, 32768] |
| 562 | + u = 'abcdé' |
| 563 | + assert ord(u[-1]) == 233 |
| 564 | + assert ord(b'a') == 97 |
| 565 | + assert ord(b'a' + bytes()) == 97 |
| 566 | + u2 = '\U0010ffff' |
| 567 | + assert ord(u2) == 1114111 |
| 568 | + try: |
| 569 | + ord('aa') |
| 570 | + assert False |
| 571 | + except TypeError: |
| 572 | + pass |
| 573 | + |
| 574 | +def test_decode() -> None: |
| 575 | + assert "\N{GREEK CAPITAL LETTER DELTA}" == '\u0394' |
| 576 | + assert "\u0394" == "\u0394" |
| 577 | + assert "\U00000394" == '\u0394' |
| 578 | + assert b'\x80abc'.decode("utf-8", "replace") == '\ufffdabc' |
| 579 | + assert b'\x80abc'.decode("utf-8", "backslashreplace") == '\\x80abc' |
| 580 | + assert b'\x80abc'.decode("utf-8", "ignore") == 'abc' |
| 581 | + assert b'\x80abc'.decode("UTF-8", "ignore") == 'abc' |
| 582 | + assert b'\x80abc'.decode("Utf-8", "ignore") == 'abc' |
| 583 | + assert b'\x80abc'.decode("utf_8", "ignore") == 'abc' |
| 584 | + assert b'\x80abc'.decode("latin1", "ignore") == '\x80abc' |
| 585 | + assert b'\xd2\xbb\xb6\xfe\xc8\xfd'.decode("gbk", "ignore") == '一二三' |
| 586 | + assert b'\xd2\xbb\xb6\xfe\xc8\xfd'.decode("latin1", "ignore") == 'Ò»¶þÈý' |
| 587 | + assert b'Z\xc3\xbcrich'.decode("utf-8") == 'Zürich' |
| 588 | + try: |
| 589 | + b'Z\xc3\xbcrich'.decode("ascii") |
| 590 | + assert False |
| 591 | + except UnicodeDecodeError: |
| 592 | + pass |
| 593 | + |
| 594 | +def test_encode() -> None: |
| 595 | + u = chr(40960) + 'abcd' + chr(1972) |
| 596 | + assert u.encode() == b'\xea\x80\x80abcd\xde\xb4' |
| 597 | + assert u.encode('utf-8') == b'\xea\x80\x80abcd\xde\xb4' |
| 598 | + try: |
| 599 | + u.encode('ascii') |
| 600 | + assert False |
| 601 | + except UnicodeEncodeError: |
| 602 | + pass |
| 603 | + assert u.encode('ascii', 'ignore') == b'abcd' |
| 604 | + assert u.encode('ASCII', 'ignore') == b'abcd' |
| 605 | + assert u.encode('ascii', 'replace') == b'?abcd?' |
| 606 | + assert u.encode('ascii', 'xmlcharrefreplace') == b'ꀀabcd޴' |
| 607 | + assert u.encode('ascii', 'backslashreplace') == b'\\ua000abcd\\u07b4' |
| 608 | + assert u.encode('ascii', 'namereplace') == b'\\N{YI SYLLABLE IT}abcd\\u07b4' |
| 609 | + assert 'pythön!'.encode() == b'pyth\xc3\xb6n!' |
| 610 | + assert '一二三'.encode('gbk') == b'\xd2\xbb\xb6\xfe\xc8\xfd' |
| 611 | + assert u.encode('UTF-8', 'ignore') == b'\xea\x80\x80abcd\xde\xb4' |
| 612 | + assert u.encode('Utf_8') == b'\xea\x80\x80abcd\xde\xb4' |
| 613 | + assert u.encode('UTF_8') == b'\xea\x80\x80abcd\xde\xb4' |
| 614 | + assert u'\u00E1'.encode('latin1') == b'\xe1' |
| 615 | + try: |
| 616 | + u.encode('latin1') |
| 617 | + assert False |
| 618 | + except UnicodeEncodeError: |
| 619 | + pass |
0 commit comments