@@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
766
766
configured. */
767
767
PyStatus
768
768
PyConfig_SetBytesString (PyConfig * config , wchar_t * * config_str ,
769
- const char * str )
769
+ const char * str )
770
770
{
771
771
return CONFIG_SET_BYTES_STR (config , config_str , str , "string" );
772
772
}
@@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
1466
1466
1467
1467
1468
1468
static const wchar_t *
1469
- config_get_stdio_errors (void )
1469
+ config_get_stdio_errors (const PyPreConfig * preconfig )
1470
1470
{
1471
+ if (preconfig -> utf8_mode ) {
1472
+ /* UTF-8 Mode uses UTF-8/surrogateescape */
1473
+ return L"surrogateescape" ;
1474
+ }
1475
+
1471
1476
#ifndef MS_WINDOWS
1472
1477
const char * loc = setlocale (LC_CTYPE , NULL );
1473
1478
if (loc != NULL ) {
@@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
1492
1497
}
1493
1498
1494
1499
1500
+ // See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
1495
1501
static PyStatus
1496
- config_get_locale_encoding (PyConfig * config , wchar_t * * locale_encoding )
1502
+ config_get_locale_encoding (PyConfig * config , const PyPreConfig * preconfig ,
1503
+ wchar_t * * locale_encoding )
1497
1504
{
1505
+ #ifdef _Py_FORCE_UTF8_LOCALE
1506
+ return PyConfig_SetString (config , locale_encoding , L"utf-8" );
1507
+ #else
1508
+ if (preconfig -> utf8_mode ) {
1509
+ return PyConfig_SetString (config , locale_encoding , L"utf-8" );
1510
+ }
1511
+
1498
1512
#ifdef MS_WINDOWS
1499
1513
char encoding [20 ];
1500
1514
PyOS_snprintf (encoding , sizeof (encoding ), "cp%u" , GetACP ());
1501
1515
return PyConfig_SetBytesString (config , locale_encoding , encoding );
1502
- #elif defined(_Py_FORCE_UTF8_LOCALE )
1503
- return PyConfig_SetString (config , locale_encoding , L"utf-8" );
1504
1516
#else
1505
1517
const char * encoding = nl_langinfo (CODESET );
1506
1518
if (!encoding || encoding [0 ] == '\0' ) {
1519
+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
1520
+ // nl_langinfo() can return an empty string when the LC_CTYPE locale is
1521
+ // not supported. Default to UTF-8 in that case, because UTF-8 is the
1522
+ // default charset on macOS.
1523
+ encoding = "UTF-8" ;
1524
+ #else
1507
1525
return _PyStatus_ERR ("failed to get the locale encoding: "
1508
- "nl_langinfo(CODESET) failed" );
1526
+ "nl_langinfo(CODESET) returns an empty string" );
1527
+ #endif
1509
1528
}
1510
1529
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
1511
1530
return CONFIG_SET_BYTES_STR (config ,
1512
1531
locale_encoding , encoding ,
1513
1532
"nl_langinfo(CODESET)" );
1514
- #endif
1533
+ #endif // !MS_WINDOWS
1534
+ #endif // !_Py_FORCE_UTF8_LOCALE
1515
1535
}
1516
1536
1517
1537
@@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
1596
1616
PyMem_RawFree (pythonioencoding );
1597
1617
}
1598
1618
1599
- /* UTF-8 Mode uses UTF-8/surrogateescape */
1600
- if (preconfig -> utf8_mode ) {
1601
- if (config -> stdio_encoding == NULL ) {
1602
- status = PyConfig_SetString (config , & config -> stdio_encoding ,
1603
- L"utf-8" );
1604
- if (_PyStatus_EXCEPTION (status )) {
1605
- return status ;
1606
- }
1607
- }
1608
- if (config -> stdio_errors == NULL ) {
1609
- status = PyConfig_SetString (config , & config -> stdio_errors ,
1610
- L"surrogateescape" );
1611
- if (_PyStatus_EXCEPTION (status )) {
1612
- return status ;
1613
- }
1614
- }
1615
- }
1616
-
1617
1619
/* Choose the default error handler based on the current locale. */
1618
1620
if (config -> stdio_encoding == NULL ) {
1619
- status = config_get_locale_encoding (config , & config -> stdio_encoding );
1621
+ status = config_get_locale_encoding (config , preconfig ,
1622
+ & config -> stdio_encoding );
1620
1623
if (_PyStatus_EXCEPTION (status )) {
1621
1624
return status ;
1622
1625
}
1623
1626
}
1624
1627
if (config -> stdio_errors == NULL ) {
1625
- const wchar_t * errors = config_get_stdio_errors ();
1628
+ const wchar_t * errors = config_get_stdio_errors (preconfig );
1626
1629
assert (errors != NULL );
1627
1630
1628
1631
status = PyConfig_SetString (config , & config -> stdio_errors , errors );
@@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
1635
1638
}
1636
1639
1637
1640
1641
+ // See also config_get_locale_encoding()
1642
+ static PyStatus
1643
+ config_get_fs_encoding (PyConfig * config , const PyPreConfig * preconfig ,
1644
+ wchar_t * * fs_encoding )
1645
+ {
1646
+ #ifdef _Py_FORCE_UTF8_FS_ENCODING
1647
+ return PyConfig_SetString (config , fs_encoding , L"utf-8" );
1648
+ #elif defined(MS_WINDOWS )
1649
+ const wchar_t * encoding ;
1650
+ if (preconfig -> legacy_windows_fs_encoding ) {
1651
+ // Legacy Windows filesystem encoding: mbcs/replace
1652
+ encoding = L"mbcs" ;
1653
+ }
1654
+ else {
1655
+ // Windows defaults to utf-8/surrogatepass (PEP 529)
1656
+ encoding = L"utf-8" ;
1657
+ }
1658
+ return PyConfig_SetString (config , fs_encoding , encoding );
1659
+ #else // !MS_WINDOWS
1660
+ if (preconfig -> utf8_mode ) {
1661
+ return PyConfig_SetString (config , fs_encoding , L"utf-8" );
1662
+ }
1663
+ else if (_Py_GetForceASCII ()) {
1664
+ return PyConfig_SetString (config , fs_encoding , L"ascii" );
1665
+ }
1666
+ else {
1667
+ return config_get_locale_encoding (config , preconfig , fs_encoding );
1668
+ }
1669
+ #endif // !MS_WINDOWS
1670
+ }
1671
+
1672
+
1638
1673
static PyStatus
1639
1674
config_init_fs_encoding (PyConfig * config , const PyPreConfig * preconfig )
1640
1675
{
1641
1676
PyStatus status ;
1642
1677
1643
1678
if (config -> filesystem_encoding == NULL ) {
1644
- #ifdef _Py_FORCE_UTF8_FS_ENCODING
1645
- status = PyConfig_SetString (config , & config -> filesystem_encoding , L"utf-8" );
1646
- #else
1647
-
1648
- #ifdef MS_WINDOWS
1649
- if (preconfig -> legacy_windows_fs_encoding ) {
1650
- /* Legacy Windows filesystem encoding: mbcs/replace */
1651
- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1652
- L"mbcs" );
1653
- }
1654
- else
1655
- #endif
1656
- if (preconfig -> utf8_mode ) {
1657
- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1658
- L"utf-8" );
1659
- }
1660
- #ifndef MS_WINDOWS
1661
- else if (_Py_GetForceASCII ()) {
1662
- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1663
- L"ascii" );
1664
- }
1665
- #endif
1666
- else {
1667
- #ifdef MS_WINDOWS
1668
- /* Windows defaults to utf-8/surrogatepass (PEP 529). */
1669
- status = PyConfig_SetString (config , & config -> filesystem_encoding ,
1670
- L"utf-8" );
1671
- #else
1672
- status = config_get_locale_encoding (config ,
1673
- & config -> filesystem_encoding );
1674
- #endif
1675
- }
1676
- #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
1677
-
1679
+ status = config_get_fs_encoding (config , preconfig ,
1680
+ & config -> filesystem_encoding );
1678
1681
if (_PyStatus_EXCEPTION (status )) {
1679
1682
return status ;
1680
1683
}
0 commit comments