@@ -24,6 +24,14 @@ class Str(str):
2424 pass
2525
2626
27+ PyUnicode_NATIVE_ASCII = 1
28+ PyUnicode_NATIVE_UCS1 = 2
29+ PyUnicode_NATIVE_UCS2 = 3
30+ PyUnicode_NATIVE_UCS4 = 4
31+ PyUnicode_NATIVE_UTF8 = 5
32+ # Invalid native format
33+ PyUnicode_NATIVE_INVALID = 0
34+
2735class CAPITest (unittest .TestCase ):
2836
2937 @support .cpython_only
@@ -1675,6 +1683,75 @@ def test_pep393_utf8_caching_bug(self):
16751683 # Check that the second call returns the same result
16761684 self .assertEqual (getargs_s_hash (s ), chr (k ).encode () * (i + 1 ))
16771685
1678-
1679- if __name__ == "__main__" :
1686+ def test_unicode_asnativeformat (self ):
1687+ # Test PyUnicode_AsNativeFormat()
1688+ asnativeformat = _testlimitedcapi .unicode_asnativeformat
1689+ self .assertEqual (asnativeformat ("abc" ),
1690+ (b'abc' , PyUnicode_NATIVE_ASCII ))
1691+ self .assertEqual (asnativeformat ("latin1:\xe9 " ),
1692+ (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ))
1693+
1694+ ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1695+ self .assertEqual (asnativeformat ('ucs2:\u20ac ' ),
1696+ ('ucs2:\u20ac ' .encode (ucs2_enc ),
1697+ PyUnicode_NATIVE_UCS2 ))
1698+
1699+ ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1700+ self .assertEqual (asnativeformat ('ucs4:\U0010ffff ' ),
1701+ ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1702+ PyUnicode_NATIVE_UCS4 ))
1703+
1704+ def test_unicode_fromnativeformat (self ):
1705+ # Test PyUnicode_FromNativeFormat()
1706+ fromnativeformat = _testlimitedcapi .unicode_fromnativeformat
1707+ self .assertEqual (fromnativeformat (b'abc' , PyUnicode_NATIVE_ASCII ),
1708+ "abc" )
1709+ self .assertEqual (fromnativeformat (b'latin1:\xe9 ' , PyUnicode_NATIVE_UCS1 ),
1710+ "latin1:\xe9 " )
1711+
1712+ ucs2_enc = 'utf-16le' if sys .byteorder == 'little' else 'utf-16be'
1713+ self .assertEqual (fromnativeformat ('ucs2:\u20ac ' .encode (ucs2_enc ),
1714+ PyUnicode_NATIVE_UCS2 ),
1715+ 'ucs2:\u20ac ' )
1716+
1717+ ucs4_enc = 'utf-32le' if sys .byteorder == 'little' else 'utf-32be'
1718+ self .assertEqual (fromnativeformat ('ucs4:\U0010ffff ' .encode (ucs4_enc ),
1719+ PyUnicode_NATIVE_UCS4 ),
1720+ 'ucs4:\U0010ffff ' )
1721+
1722+ text = "abc\xe9 \U0010ffff "
1723+ self .assertEqual (fromnativeformat (text .encode ('utf8' ),
1724+ PyUnicode_NATIVE_UTF8 ),
1725+ text )
1726+
1727+ # Empty string
1728+ for native_format in (
1729+ PyUnicode_NATIVE_ASCII ,
1730+ PyUnicode_NATIVE_UCS1 ,
1731+ PyUnicode_NATIVE_UCS2 ,
1732+ PyUnicode_NATIVE_UCS4 ,
1733+ PyUnicode_NATIVE_UTF8 ,
1734+ ):
1735+ with self .subTest (native_format = native_format ):
1736+ self .assertEqual (fromnativeformat (b'' , native_format ),
1737+ '' )
1738+
1739+ # Invalid format
1740+ with self .assertRaises (ValueError ):
1741+ fromnativeformat (b'' , PyUnicode_NATIVE_INVALID )
1742+
1743+ # Invalid size
1744+ ucs2 = 'ucs2:\u20ac ' .encode (ucs2_enc )
1745+ with self .assertRaises (ValueError ):
1746+ fromnativeformat (ucs2 [:- 1 ], PyUnicode_NATIVE_UCS2 )
1747+ ucs4 = 'ucs4:\U0010ffff ' .encode (ucs4_enc )
1748+ with self .assertRaises (ValueError ):
1749+ fromnativeformat (ucs4 [:- 1 ], PyUnicode_NATIVE_UCS4 )
1750+ with self .assertRaises (ValueError ):
1751+ fromnativeformat (ucs4 [:- 2 ], PyUnicode_NATIVE_UCS4 )
1752+ with self .assertRaises (ValueError ):
1753+ fromnativeformat (ucs4 [:- 3 ], PyUnicode_NATIVE_UCS4 )
1754+
1755+
1756+ if __name__ == '__main__' :
16801757 unittest .main ()
0 commit comments