From 1cba2e9db1febdd53a4b8dee2e9b592ea18ea942 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 1 Jul 2022 16:46:13 +0200 Subject: [PATCH 1/8] Fixed `tm.set_locale` context manager, it could error and leak when category LC_ALL was used. Fixed #46595 --- pandas/_config/localization.py | 3 ++- pandas/tests/config/test_localization.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index fa5503029fd4b..9e815920c760a 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -39,7 +39,8 @@ def set_locale( particular locale, without globally setting the locale. This probably isn't thread-safe. """ - current_locale = locale.getlocale() + # getlocale is wrong, see GH#46595 + current_locale = locale.setlocale(lc_var) try: locale.setlocale(lc_var, new_locale) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 21b1b7ed6ee65..3cc03ecc51f86 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -15,7 +15,7 @@ import pandas as pd _all_locales = get_locales() or [] -_current_locale = locale.getlocale() +_current_locale = locale.setlocale(locale.LC_ALL) # getlocale() is wrong, see GH#46595 # Don't run any of these tests if we are on Windows or have no locales. pytestmark = pytest.mark.skipif( @@ -95,7 +95,8 @@ def test_set_locale(lang, enc): assert normalized_locale == new_locale # Once we exit the "with" statement, locale should be back to what it was. - current_locale = locale.getlocale() + # current_locale = locale.getlocale() is wrong, see GH#46595 + current_locale = locale.setlocale(locale.LC_ALL) assert current_locale == _current_locale From 149556f990d251dd03a50ae795dc1f939f2e02c4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 6 Jul 2022 09:25:06 +0200 Subject: [PATCH 2/8] Marked `xfail`ing tests that now correctly run with the appropriate locale on it_IT and zh_CN runners, as `strict=False`. --- pandas/tests/tools/test_to_datetime.py | 3 +++ pandas/tests/tools/test_to_time.py | 1 + 2 files changed, 4 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index f712b4a24e5e5..3c053df740232 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -291,6 +291,7 @@ def test_to_datetime_format_microsecond(self, cache): marks=pytest.mark.xfail( locale.getlocale()[0] in ("zh_CN", "it_IT"), reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + strict=False, ), ), pytest.param( @@ -300,6 +301,7 @@ def test_to_datetime_format_microsecond(self, cache): marks=pytest.mark.xfail( locale.getlocale()[0] in ("zh_CN", "it_IT"), reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + strict=False, ), ), pytest.param( @@ -309,6 +311,7 @@ def test_to_datetime_format_microsecond(self, cache): marks=pytest.mark.xfail( locale.getlocale()[0] in ("zh_CN", "it_IT"), reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + strict=False, ), ), ], diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 7983944d4384d..c4751137fbeeb 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -12,6 +12,7 @@ fails_on_non_english = pytest.mark.xfail( locale.getlocale()[0] in ("zh_CN", "it_IT"), reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8", + strict=False, ) From f692b5f0afd50a04f48a46b1fcd21ad337e3a040 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 6 Jul 2022 10:04:25 +0200 Subject: [PATCH 3/8] Added a `get_current_locale` function, compliant with `setlocale`, to avoid misuses of `getlocale` (see #46595). Added tests. --- pandas/_config/localization.py | 27 ++++++++++- pandas/tests/config/test_localization.py | 59 ++++++++++++++++++++---- 2 files changed, 74 insertions(+), 12 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 9e815920c760a..2ad9c29721213 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -17,6 +17,29 @@ from pandas._config.config import options +def get_current_locale(lc_var: int = locale.LC_ALL): + """ + Return the current locale associated with category ``lc_var``. + This is a convenience method to avoid misuses of ``getlocale``. Indeed the + result returned by ``getlocale`` can not always be used to set back the locale + using ``setlocale``. See GH#46595 + + Parameters + ---------- + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Returns + ------- + + """ + # `getlocale` does not always play well with `setLocale`, avoid using it + # return locale.getlocale() + + # Using `setlocale` with no second argument returns the current locale + return locale.setlocale(lc_var) + + @contextmanager def set_locale( new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL @@ -39,8 +62,8 @@ def set_locale( particular locale, without globally setting the locale. This probably isn't thread-safe. """ - # getlocale is wrong, see GH#46595 - current_locale = locale.setlocale(lc_var) + # getlocale is not always compliant with setlocale, see GH#46595 + current_locale = get_current_locale(lc_var=lc_var) try: locale.setlocale(lc_var, new_locale) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 3cc03ecc51f86..daadf9f5066c7 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -6,35 +6,74 @@ from pandas._config.localization import ( can_set_locale, + get_current_locale, get_locales, set_locale, ) -from pandas.compat import is_platform_windows - import pandas as pd _all_locales = get_locales() or [] _current_locale = locale.setlocale(locale.LC_ALL) # getlocale() is wrong, see GH#46595 -# Don't run any of these tests if we are on Windows or have no locales. -pytestmark = pytest.mark.skipif( - is_platform_windows() or not _all_locales, reason="Need non-Windows and locales" -) +# Don't run any of these tests if we have no locales. +pytestmark = pytest.mark.skipif(not _all_locales, reason="Need locales") _skip_if_only_one_locale = pytest.mark.skipif( len(_all_locales) <= 1, reason="Need multiple locales for meaningful test" ) -def test_can_set_locale_valid_set(): +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_get_current_locale(lc_var): + # Make sure that `get_current_locale` uses setlocale + before_locale = get_current_locale(lc_var) + assert before_locale == locale.setlocale(lc_var) + + +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_current_locale(lc_var): + # Can set the current locale + before_locale = get_current_locale(lc_var) + assert can_set_locale(before_locale, lc_var=lc_var) + after_locale = get_current_locale(lc_var) + assert before_locale == after_locale + + +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_valid_set(lc_var): # Can set the default locale. - assert can_set_locale("") + before_locale = get_current_locale(lc_var) + assert can_set_locale("", lc_var=lc_var) + after_locale = get_current_locale(lc_var) + assert before_locale == after_locale -def test_can_set_locale_invalid_set(): +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_invalid_set(lc_var): # Cannot set an invalid locale. - assert not can_set_locale("non-existent_locale") + before_locale = get_current_locale(lc_var) + assert not can_set_locale("non-existent_locale", lc_var=lc_var) + after_locale = get_current_locale(lc_var) + assert before_locale == after_locale + + +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], +) +@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) +def test_can_set_locale_no_leak(lang, enc, lc_var): + # Test that can_set_locale does not leak even when returning False. See GH#46595 + before_locale = get_current_locale(lc_var) + can_set_locale((lang, enc), locale.LC_ALL) + after_locale = get_current_locale(lc_var) + assert before_locale == after_locale def test_can_set_locale_invalid_get(monkeypatch): From a651167ba85db00990310d7fe9ae5179defa994f Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 6 Jul 2022 10:15:27 +0200 Subject: [PATCH 4/8] Added `get_current_locale` to the testing module for convenience --- pandas/_testing/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0fcea111716ec..a3054be2ca175 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -21,6 +21,7 @@ from pandas._config.localization import ( # noqa:F401 can_set_locale, + get_current_locale, get_locales, set_locale, ) From d1f9cf921de03b344c31f892a8abd872ae845120 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 6 Jul 2022 16:34:54 +0200 Subject: [PATCH 5/8] Minor improvement in `test_set_locale` --- pandas/tests/config/test_localization.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index daadf9f5066c7..031491cc3fdab 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -111,10 +111,7 @@ def test_get_locales_prefix(): ], ) def test_set_locale(lang, enc): - if all(x is None for x in _current_locale): - # Not sure why, but on some Travis runs with pytest, - # getlocale() returned (None, None). - pytest.skip("Current locale is not set.") + before_locale = get_current_locale() enc = codecs.lookup(enc).name new_locale = lang, enc @@ -134,9 +131,8 @@ def test_set_locale(lang, enc): assert normalized_locale == new_locale # Once we exit the "with" statement, locale should be back to what it was. - # current_locale = locale.getlocale() is wrong, see GH#46595 - current_locale = locale.setlocale(locale.LC_ALL) - assert current_locale == _current_locale + after_locale = get_current_locale() + assert before_locale == after_locale def test_encoding_detected(): From b53796bbe76155294b2ca56ca1c84f92ce9a096c Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Thu, 7 Jul 2022 12:12:11 +0200 Subject: [PATCH 6/8] Code review: improved `get_current_locale` --- pandas/_config/localization.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index 2ad9c29721213..ad2b298c156cf 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -17,7 +17,7 @@ from pandas._config.config import options -def get_current_locale(lc_var: int = locale.LC_ALL): +def get_current_locale(lc_var: int = locale.LC_ALL) -> str: """ Return the current locale associated with category ``lc_var``. This is a convenience method to avoid misuses of ``getlocale``. Indeed the @@ -31,11 +31,10 @@ def get_current_locale(lc_var: int = locale.LC_ALL): Returns ------- - + locale_str : str + A string representing the current locale for category ``lc_var``, that + can be used safely with ``setlocale`` for the same category. """ - # `getlocale` does not always play well with `setLocale`, avoid using it - # return locale.getlocale() - # Using `setlocale` with no second argument returns the current locale return locale.setlocale(lc_var) From 6b863c327f433466510fe569e23de2729d3a1e2f Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 8 Jul 2022 18:30:49 +0200 Subject: [PATCH 7/8] Code review: removed `get_current_locale` --- pandas/_config/localization.py | 26 ++------------------- pandas/_testing/__init__.py | 1 - pandas/tests/config/test_localization.py | 29 +++++++++++------------- 3 files changed, 15 insertions(+), 41 deletions(-) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py index ad2b298c156cf..c4355e954c67c 100644 --- a/pandas/_config/localization.py +++ b/pandas/_config/localization.py @@ -17,28 +17,6 @@ from pandas._config.config import options -def get_current_locale(lc_var: int = locale.LC_ALL) -> str: - """ - Return the current locale associated with category ``lc_var``. - This is a convenience method to avoid misuses of ``getlocale``. Indeed the - result returned by ``getlocale`` can not always be used to set back the locale - using ``setlocale``. See GH#46595 - - Parameters - ---------- - lc_var : int, default `locale.LC_ALL` - The category of the locale being set. - - Returns - ------- - locale_str : str - A string representing the current locale for category ``lc_var``, that - can be used safely with ``setlocale`` for the same category. - """ - # Using `setlocale` with no second argument returns the current locale - return locale.setlocale(lc_var) - - @contextmanager def set_locale( new_locale: str | tuple[str, str], lc_var: int = locale.LC_ALL @@ -61,8 +39,8 @@ def set_locale( particular locale, without globally setting the locale. This probably isn't thread-safe. """ - # getlocale is not always compliant with setlocale, see GH#46595 - current_locale = get_current_locale(lc_var=lc_var) + # getlocale is not always compliant with setlocale, use setlocale. GH#46595 + current_locale = locale.setlocale(lc_var) try: locale.setlocale(lc_var, new_locale) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index a3054be2ca175..0fcea111716ec 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -21,7 +21,6 @@ from pandas._config.localization import ( # noqa:F401 can_set_locale, - get_current_locale, get_locales, set_locale, ) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 031491cc3fdab..f972a9ee3b497 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -6,7 +6,6 @@ from pandas._config.localization import ( can_set_locale, - get_current_locale, get_locales, set_locale, ) @@ -24,37 +23,35 @@ ) -@pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) -def test_get_current_locale(lc_var): - # Make sure that `get_current_locale` uses setlocale - before_locale = get_current_locale(lc_var) - assert before_locale == locale.setlocale(lc_var) +def _get_current_locale(lc_var: int = locale.LC_ALL) -> str: + # getlocale is not always compliant with setlocale, use setlocale. GH#46595 + return locale.setlocale(lc_var) @pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) def test_can_set_current_locale(lc_var): # Can set the current locale - before_locale = get_current_locale(lc_var) + before_locale = _get_current_locale(lc_var) assert can_set_locale(before_locale, lc_var=lc_var) - after_locale = get_current_locale(lc_var) + after_locale = _get_current_locale(lc_var) assert before_locale == after_locale @pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) def test_can_set_locale_valid_set(lc_var): # Can set the default locale. - before_locale = get_current_locale(lc_var) + before_locale = _get_current_locale(lc_var) assert can_set_locale("", lc_var=lc_var) - after_locale = get_current_locale(lc_var) + after_locale = _get_current_locale(lc_var) assert before_locale == after_locale @pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) def test_can_set_locale_invalid_set(lc_var): # Cannot set an invalid locale. - before_locale = get_current_locale(lc_var) + before_locale = _get_current_locale(lc_var) assert not can_set_locale("non-existent_locale", lc_var=lc_var) - after_locale = get_current_locale(lc_var) + after_locale = _get_current_locale(lc_var) assert before_locale == after_locale @@ -70,9 +67,9 @@ def test_can_set_locale_invalid_set(lc_var): @pytest.mark.parametrize("lc_var", (locale.LC_ALL, locale.LC_CTYPE, locale.LC_TIME)) def test_can_set_locale_no_leak(lang, enc, lc_var): # Test that can_set_locale does not leak even when returning False. See GH#46595 - before_locale = get_current_locale(lc_var) + before_locale = _get_current_locale(lc_var) can_set_locale((lang, enc), locale.LC_ALL) - after_locale = get_current_locale(lc_var) + after_locale = _get_current_locale(lc_var) assert before_locale == after_locale @@ -111,7 +108,7 @@ def test_get_locales_prefix(): ], ) def test_set_locale(lang, enc): - before_locale = get_current_locale() + before_locale = _get_current_locale() enc = codecs.lookup(enc).name new_locale = lang, enc @@ -131,7 +128,7 @@ def test_set_locale(lang, enc): assert normalized_locale == new_locale # Once we exit the "with" statement, locale should be back to what it was. - after_locale = get_current_locale() + after_locale = _get_current_locale() assert before_locale == after_locale From 38b7e263b9ea9bcab487c1bff25e389d1c1828d2 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Fri, 8 Jul 2022 18:34:56 +0200 Subject: [PATCH 8/8] Code review: comment --- pandas/tests/tools/test_to_datetime.py | 2 ++ pandas/tests/tools/test_to_time.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 3c053df740232..afa06bf1a79af 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -284,6 +284,8 @@ def test_to_datetime_format_microsecond(self, cache): "%m/%d/%Y %H:%M:%S", Timestamp("2010-01-10 13:56:01"), ], + # The 3 tests below are locale-dependent. + # They pass, except when the machine locale is zh_CN or it_IT . pytest.param( "01/10/2010 08:14 PM", "%m/%d/%Y %I:%M %p", diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index c4751137fbeeb..a8316e0f3970c 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -9,6 +9,8 @@ from pandas.core.tools.datetimes import to_time as to_time_alias from pandas.core.tools.times import to_time +# The tests marked with this are locale-dependent. +# They pass, except when the machine locale is zh_CN or it_IT. fails_on_non_english = pytest.mark.xfail( locale.getlocale()[0] in ("zh_CN", "it_IT"), reason="fail on a CI build with LC_ALL=zh_CN.utf8/it_IT.utf8",