From 1305285c848455613e7479b92e4815d4e36bf4b4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 3 Oct 2024 20:46:04 +0300 Subject: [PATCH 01/16] gh-53203: Fix strptime(..,'%c') on locales with short month names In some locales (for example French and Hebrew), the default month used in __calc_date_time has the same name in full and abbreviated form. So the code failed to correctly distinguish formats %b and %B. Co-authored-by: Eli Bendersky --- Lib/_strptime.py | 32 +++++- Lib/test/support/__init__.py | 44 +++++++- Lib/test/test_strptime.py | 103 +++++++++++------- ...4-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 2 + 4 files changed, 137 insertions(+), 44 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index a3f8bb544d518d..b26ff97121e85d 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -119,13 +119,19 @@ def __calc_date_time(self): date_time[1] = time.strftime("%x", time_tuple).lower() date_time[2] = time.strftime("%X", time_tuple).lower() replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), + (self.a_weekday[2], '%a'), + (self.am_pm[1], '%p'), ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] + # The month format is treated specially because of a possible + # ambiguity in some locales where the full and abbreviated + # month names are equal. See doc of __find_month_format for more + # details. + # + month_format = self.__find_month_format() replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone for tz in tz_values]) for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): @@ -137,6 +143,8 @@ def __calc_date_time(self): # strings (e.g., MacOS 9 having timezone as ('','')). if old: current_format = current_format.replace(old, new) + for month_str in (self.f_month[3], self.a_month[3]): + current_format = current_format.replace(month_str, month_format) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. @@ -150,6 +158,26 @@ def __calc_date_time(self): self.LC_date = date_time[1] self.LC_time = date_time[2] + def __find_month_format(self): + """Find the month format appropriate for the current locale. + + In some locales (for example French and Hebrew), the default month + used in __calc_date_time has the same name in full and abbreviated + form. Thus, cycle months of the year until a month is found where + these representations differ, and check the datetime string created + by strftime against this month, to make sure we select the correct + format specifier. + """ + for m in range(1, 13): + if self.f_month[m] != self.a_month[m]: + time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) + datetime = time.strftime('%c', time_tuple).lower() + if datetime.find(self.f_month[m]) >= 0: + return '%B' + elif datetime.find(self.a_month[m]) >= 0: + return '%b' + return '%B' + def __calc_timezone(self): # Set self.timezone by using time.tzname. # Do not worry about possibility of time.tzname[0] == time.tzname[1] diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 1a44cc638b5714..7e7b3fea289b5f 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -930,8 +930,8 @@ def check_sizeof(test, o, size): test.assertEqual(result, size, msg) #======================================================================= -# Decorator for running a function in a different locale, correctly resetting -# it afterwards. +# Decorator/context manager for running a code in a different locale, +# correctly resetting it afterwards. @contextlib.contextmanager def run_with_locale(catstr, *locales): @@ -959,6 +959,46 @@ def run_with_locale(catstr, *locales): if locale and orig_locale: locale.setlocale(category, orig_locale) +#======================================================================= +# Decorator for running a function in multiple locales (if they are +# availasble) and resetting the original locale afterwards. + +def run_with_locales(catstr, *locales): + def deco(func): + @functools.wraps(func) + def wrapper(self, /, *args, **kwargs): + dry_run = True + try: + import locale + category = getattr(locale, catstr) + orig_locale = locale.setlocale(category) + except AttributeError: + # if the test author gives us an invalid category string + raise + except: + # cannot retrieve original locale, so do nothing + pass + else: + try: + for loc in locales: + with self.subTest(locale=loc): + try: + locale.setlocale(category, loc) + except: + self.skipTest(f'no locale {loc!r}') + else: + dry_run = False + func(self, *args, **kwargs) + finally: + locale.setlocale(category, orig_locale) + if dry_run: + # no locales available, so just run the test + # with the current locale + with self.subTest(locale=None): + func(self, *args, **kwargs) + return wrapper + return deco + #======================================================================= # Decorator for running a function in a specific timezone, correctly # resetting it afterwards. diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 038746e26c24ad..bdcd31639f7532 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -7,7 +7,8 @@ import os import sys from test import support -from test.support import skip_if_buggy_ucrt_strfptime, warnings_helper +from test.support import warnings_helper +from test.support import skip_if_buggy_ucrt_strfptime, run_with_locales from datetime import date as datetime_date import _strptime @@ -289,54 +290,64 @@ def test_unconverteddata(self): # Check ValueError is raised when there is unconverted data self.assertRaises(ValueError, _strptime._strptime_time, "10 12", "%m") - def helper(self, directive, position): + def roundtrip(self, fmt, position, time_tuple=None): """Helper fxn in testing.""" - fmt = "%d %Y" if directive == 'd' else "%" + directive - strf_output = time.strftime(fmt, self.time_tuple) + if time_tuple is None: + time_tuple = self.time_tuple + strf_output = time.strftime(fmt, time_tuple) strp_output = _strptime._strptime_time(strf_output, fmt) - self.assertTrue(strp_output[position] == self.time_tuple[position], - "testing of '%s' directive failed; '%s' -> %s != %s" % - (directive, strf_output, strp_output[position], - self.time_tuple[position])) + self.assertEqual(strp_output[position], time_tuple[position], + "testing of %r format failed; %r -> %r != %r" % + (fmt, strf_output, strp_output[position], + time_tuple[position])) + if support.verbose >= 3: + print("testing of %r format: %r -> %r" % + (fmt, strf_output, strp_output[position])) def test_year(self): # Test that the year is handled properly - for directive in ('y', 'Y'): - self.helper(directive, 0) + self.roundtrip('%Y', 0) + self.roundtrip('%y', 0) + self.roundtrip('%Y', 0, (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + # Must also make sure %y values are correct for bounds set by Open Group - for century, bounds in ((1900, ('69', '99')), (2000, ('00', '68'))): - for bound in bounds: - strp_output = _strptime._strptime_time(bound, '%y') - expected_result = century + int(bound) - self.assertTrue(strp_output[0] == expected_result, - "'y' test failed; passed in '%s' " - "and returned '%s'" % (bound, strp_output[0])) + strptime = _strptime._strptime_time + self.assertEqual(strptime('00', '%y')[0], 2000) + self.assertEqual(strptime('68', '%y')[0], 2068) + self.assertEqual(strptime('69', '%y')[0], 1969) + self.assertEqual(strptime('99', '%y')[0], 1999) def test_month(self): # Test for month directives - for directive in ('B', 'b', 'm'): - self.helper(directive, 1) + self.roundtrip('%m', 1) + + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + def test_month_locale(self): + # Test for month directives + self.roundtrip('%B', 1) + self.roundtrip('%b', 1) def test_day(self): # Test for day directives - self.helper('d', 2) + self.roundtrip('%d %Y', 2) def test_hour(self): # Test hour directives - self.helper('H', 3) - strf_output = time.strftime("%I %p", self.time_tuple) - strp_output = _strptime._strptime_time(strf_output, "%I %p") - self.assertTrue(strp_output[3] == self.time_tuple[3], - "testing of '%%I %%p' directive failed; '%s' -> %s != %s" % - (strf_output, strp_output[3], self.time_tuple[3])) + self.roundtrip('%H', 3) + + # NB: Only works on locales with AM/PM + @run_with_locales('LC_TIME', 'en_US', 'ja_JP') + def test_hour_locale(self): + # Test hour directives + self.roundtrip('%I %p', 3) def test_minute(self): # Test minute directives - self.helper('M', 4) + self.roundtrip('%M', 4) def test_second(self): # Test second directives - self.helper('S', 5) + self.roundtrip('%S', 5) def test_fraction(self): # Test microseconds @@ -347,12 +358,18 @@ def test_fraction(self): def test_weekday(self): # Test weekday directives - for directive in ('A', 'a', 'w', 'u'): - self.helper(directive,6) + self.roundtrip('%w', 6) + self.roundtrip('%u', 6) + + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', '') + def test_weekday_locale(self): + # Test weekday directives + self.roundtrip('%A', 6) + self.roundtrip('%a', 6) def test_julian(self): # Test julian directives - self.helper('j', 7) + self.roundtrip('%j', 7) def test_offset(self): one_hour = 60 * 60 @@ -449,20 +466,26 @@ def test_bad_timezone(self): "time.daylight set to %s and passing in %s" % (time.tzname, tz_value, time.daylight, tz_name)) - def test_date_time(self): + # NB: Does not roundtrip on some locales like hif_FJ. + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + def test_date_time_locale(self): # Test %c directive - for position in range(6): - self.helper('c', position) + self.roundtrip('%c', slice(0, 6)) + self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - def test_date(self): + # NB: Dates before 1969 do not work on locales: C, POSIX, + # az_IR, fa_IR, sd_PK, uk_UA. + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + def test_date_locale(self): # Test %x directive - for position in range(0,3): - self.helper('x', position) + self.roundtrip('%x', slice(0, 3)) + self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - def test_time(self): + # NB: Does not distinguish AM/PM time on a number of locales. + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + def test_time_locale(self): # Test %X directive - for position in range(3,6): - self.helper('X', position) + self.roundtrip('%X', slice(3, 6)) def test_percent(self): # Make sure % signs are handled properly diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst new file mode 100644 index 00000000000000..be828b004bb20a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c`` format in locales with a short March +month name, such as French or Hebrew. From dafd06c8ce1ce4a9777d457f66c26512f3604fa9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 4 Oct 2024 09:56:30 +0300 Subject: [PATCH 02/16] Do not run tests on default locale if it does not worl on all locales. --- Lib/test/support/__init__.py | 10 +++++----- Lib/test/test_strptime.py | 16 +++++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 7e7b3fea289b5f..c7e5933cc75cf0 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -942,7 +942,7 @@ def run_with_locale(catstr, *locales): except AttributeError: # if the test author gives us an invalid category string raise - except: + except Exception: # cannot retrieve original locale, so do nothing locale = orig_locale = None else: @@ -950,7 +950,7 @@ def run_with_locale(catstr, *locales): try: locale.setlocale(category, loc) break - except: + except locale.Error: pass try: @@ -967,7 +967,7 @@ def run_with_locales(catstr, *locales): def deco(func): @functools.wraps(func) def wrapper(self, /, *args, **kwargs): - dry_run = True + dry_run = '' in locales try: import locale category = getattr(locale, catstr) @@ -975,7 +975,7 @@ def wrapper(self, /, *args, **kwargs): except AttributeError: # if the test author gives us an invalid category string raise - except: + except Exception: # cannot retrieve original locale, so do nothing pass else: @@ -984,7 +984,7 @@ def wrapper(self, /, *args, **kwargs): with self.subTest(locale=loc): try: locale.setlocale(category, loc) - except: + except locale.Error: self.skipTest(f'no locale {loc!r}') else: dry_run = False diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index bdcd31639f7532..89d73df8d7e96f 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -321,11 +321,14 @@ def test_month(self): # Test for month directives self.roundtrip('%m', 1) - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') def test_month_locale(self): # Test for month directives self.roundtrip('%B', 1) self.roundtrip('%b', 1) + for m in range(1, 13): + self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) def test_day(self): # Test for day directives @@ -336,7 +339,7 @@ def test_hour(self): self.roundtrip('%H', 3) # NB: Only works on locales with AM/PM - @run_with_locales('LC_TIME', 'en_US', 'ja_JP') + @run_with_locales('LC_TIME', 'C', 'en_US', 'ja_JP') def test_hour_locale(self): # Test hour directives self.roundtrip('%I %p', 3) @@ -361,7 +364,7 @@ def test_weekday(self): self.roundtrip('%w', 6) self.roundtrip('%u', 6) - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', '') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', '') def test_weekday_locale(self): # Test weekday directives self.roundtrip('%A', 6) @@ -467,14 +470,13 @@ def test_bad_timezone(self): (time.tzname, tz_value, time.daylight, tz_name)) # NB: Does not roundtrip on some locales like hif_FJ. - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') def test_date_time_locale(self): # Test %c directive self.roundtrip('%c', slice(0, 6)) self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - # NB: Dates before 1969 do not work on locales: C, POSIX, - # az_IR, fa_IR, sd_PK, uk_UA. + # NB: Dates before 1969 do not work on a number of locales, including C. @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') def test_date_locale(self): # Test %x directive @@ -482,7 +484,7 @@ def test_date_locale(self): self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not distinguish AM/PM time on a number of locales. - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') def test_time_locale(self): # Test %X directive self.roundtrip('%X', slice(3, 6)) From b5bace6bc5c05d511d7d04d1e887d9f0c7a1fee9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 4 Oct 2024 13:29:33 +0300 Subject: [PATCH 03/16] Skip some tests on Emscripten. --- Lib/test/test_strptime.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 89d73df8d7e96f..0d4404cd627a7a 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -477,10 +477,18 @@ def test_date_time_locale(self): self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Dates before 1969 do not work on a number of locales, including C. - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') def test_date_locale(self): # Test %x directive self.roundtrip('%x', slice(0, 3)) + + # NB: Dates before 1969 do not work on a number of locales, including C. + @unittest.skipIf( + support.is_emscripten, "musl libc issue on Emscripten, bpo-46390" + ) + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + def test_date_locale2(self): + # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not distinguish AM/PM time on a number of locales. From b0d1fb56669da0da0568afe328817b553ac2ee03 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Oct 2024 15:15:18 +0300 Subject: [PATCH 04/16] Skip some tests on WASI. --- Lib/test/test_strptime.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 0d4404cd627a7a..417a2e9b7cad7c 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -484,7 +484,8 @@ def test_date_locale(self): # NB: Dates before 1969 do not work on a number of locales, including C. @unittest.skipIf( - support.is_emscripten, "musl libc issue on Emscripten, bpo-46390" + support.is_emscripten or support.is_wasi, + "musl libc issue on Emscripten, bpo-46390" ) @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') def test_date_locale2(self): From e5be6fa130989a5f33b14b96035de506622a1ae4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Oct 2024 17:28:06 +0300 Subject: [PATCH 05/16] Add many comments and do not run tests on potentially broken locales. --- Lib/test/test_strptime.py | 40 ++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 417a2e9b7cad7c..3a63f8ddafc226 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -469,20 +469,41 @@ def test_bad_timezone(self): "time.daylight set to %s and passing in %s" % (time.tzname, tz_value, time.daylight, tz_name)) - # NB: Does not roundtrip on some locales like hif_FJ. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + # NB: Does not roundtrip in some locales due to the ambiguity of + # the date and time representation (bugs in locales?): + # * Seconds are not included: bem_ZM, bokmal, ff_SN, nb_NO, nn_NO, + # no_NO, norwegian, nynorsk. + # * Hours are in 12-hour notation without AM/PM indication: hy_AM, + # id_ID, ms_MY. + # * Year is not included: ha_NG. + # + # BUG: Generates invalid regexp for a number of Arabic locales, + # br_FR, csb_PL, lo_LA, thai, th_TH. + # BUG: Generates regexp that does not match the current date and time + # for a number of Arabic locales, fa_IR, gez_ER, gez_ET, lzh_TW, + # my_MM, or_IN, shn_MM, yo_NG. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL') def test_date_time_locale(self): # Test %c directive self.roundtrip('%c', slice(0, 6)) + + # NB: Dates before 1969 do not roundtrip on some locales: + # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL') + def test_date_time_locale2(self): + # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - # NB: Dates before 1969 do not work on a number of locales, including C. + # BUG: Generates invalid regexp for lo_LA, thai, th_TH. + # BUG: Generates regexp that does not match the current date + # for a number of Arabic locales, az_IR, eu_ES, eu_FR, fa_IR, lzh_TW, + # my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') def test_date_locale(self): # Test %x directive self.roundtrip('%x', slice(0, 3)) - # NB: Dates before 1969 do not work on a number of locales, including C. + # NB: Dates before 1969 do not roundtrip on many locales, including C. @unittest.skipIf( support.is_emscripten or support.is_wasi, "musl libc issue on Emscripten, bpo-46390" @@ -492,7 +513,16 @@ def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - # NB: Does not distinguish AM/PM time on a number of locales. + # NB: Does not roundtrip in some locales due to the ambiguity of + # the time representation (bugs in locales?): + # * Seconds are not included: bokmal, ff_SN, nb_NO, nn_NO, no_NO, + # norwegian, nynorsk. + # * Hours are in 12-hour notation without AM/PM indication: hy_AM, + # ms_MY, sm_WS. + # BUG: Generates regexp that does not match the current time for + # aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET, + # lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET, + # so_SO, ti_ER, ti_ET, tig_ER, wal_ET. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') def test_time_locale(self): # Test %X directive From a674f29d76f371589d2cf4bd632fa6001d6aa994 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Oct 2024 19:28:12 +0300 Subject: [PATCH 06/16] Fx also for the Basque locale. --- Lib/_strptime.py | 9 +++++---- Lib/test/test_strptime.py | 16 ++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index b26ff97121e85d..0ba5c0fe2200d2 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -131,7 +131,6 @@ def __calc_date_time(self): # month names are equal. See doc of __find_month_format for more # details. # - month_format = self.__find_month_format() replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone for tz in tz_values]) for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): @@ -144,7 +143,9 @@ def __calc_date_time(self): if old: current_format = current_format.replace(old, new) for month_str in (self.f_month[3], self.a_month[3]): - current_format = current_format.replace(month_str, month_format) + if month_str in current_format: + month_format = self.__find_month_format(directive) + current_format = current_format.replace(month_str, month_format) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. @@ -158,7 +159,7 @@ def __calc_date_time(self): self.LC_date = date_time[1] self.LC_time = date_time[2] - def __find_month_format(self): + def __find_month_format(self, directive): """Find the month format appropriate for the current locale. In some locales (for example French and Hebrew), the default month @@ -171,7 +172,7 @@ def __find_month_format(self): for m in range(1, 13): if self.f_month[m] != self.a_month[m]: time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) - datetime = time.strftime('%c', time_tuple).lower() + datetime = time.strftime(directive, time_tuple).lower() if datetime.find(self.f_month[m]) >= 0: return '%B' elif datetime.find(self.a_month[m]) >= 0: diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 3a63f8ddafc226..fe14570c32ba82 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -476,13 +476,15 @@ def test_bad_timezone(self): # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # id_ID, ms_MY. # * Year is not included: ha_NG. + # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # # BUG: Generates invalid regexp for a number of Arabic locales, - # br_FR, csb_PL, lo_LA, thai, th_TH. + # br_FR, csb_PL. # BUG: Generates regexp that does not match the current date and time # for a number of Arabic locales, fa_IR, gez_ER, gez_ET, lzh_TW, # my_MM, or_IN, shn_MM, yo_NG. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES') def test_date_time_locale(self): # Test %c directive self.roundtrip('%c', slice(0, 6)) @@ -494,11 +496,13 @@ def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) - # BUG: Generates invalid regexp for lo_LA, thai, th_TH. + # NB: Does not roundtrip because use non-Gregorian calendar: + # lo_LA, thai, th_TH. # BUG: Generates regexp that does not match the current date - # for a number of Arabic locales, az_IR, eu_ES, eu_FR, fa_IR, lzh_TW, - # my_MM, or_IN, shn_MM. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', '') + # for a number of Arabic locales, az_IR, fa_IR, lzh_TW, my_MM, + # or_IN, shn_MM. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES', '') def test_date_locale(self): # Test %x directive self.roundtrip('%x', slice(0, 3)) From 34320d633b21dfd4cb72e5f179705d7d66193789 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 7 Oct 2024 22:50:06 +0300 Subject: [PATCH 07/16] Fix for Arabic. --- Lib/_strptime.py | 59 +++++++++++++++++++++++++++++---------- Lib/test/test_strptime.py | 13 ++++----- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 0ba5c0fe2200d2..05faccd8d45230 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -28,6 +28,18 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -119,6 +131,7 @@ def __calc_date_time(self): date_time[1] = time.strftime("%x", time_tuple).lower() date_time[2] = time.strftime("%X", time_tuple).lower() replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), + ('', ''), # month name, if used (self.a_weekday[2], '%a'), (self.am_pm[1], '%p'), ('1999', '%Y'), ('99', '%y'), ('22', '%H'), @@ -135,6 +148,10 @@ def __calc_date_time(self): for tz in tz_values]) for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): current_format = date_time[offset] + assert replacement_pairs[2] == ('', '') + month_format = self.__find_month_format(directive) + if month_format: + replacement_pairs[2] = (month_format[0][3], month_format[1]) for old, new in replacement_pairs: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's @@ -142,10 +159,7 @@ def __calc_date_time(self): # strings (e.g., MacOS 9 having timezone as ('','')). if old: current_format = current_format.replace(old, new) - for month_str in (self.f_month[3], self.a_month[3]): - if month_str in current_format: - month_format = self.__find_month_format(directive) - current_format = current_format.replace(month_str, month_format) + replacement_pairs[2] = ('', '') # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. @@ -164,20 +178,35 @@ def __find_month_format(self, directive): In some locales (for example French and Hebrew), the default month used in __calc_date_time has the same name in full and abbreviated - form. Thus, cycle months of the year until a month is found where - these representations differ, and check the datetime string created - by strftime against this month, to make sure we select the correct - format specifier. + form. Also, the month name can by accident match other part of the + representation: the day of the week name (for example in Morisyen) + or the month number (for example in Japanese). Thus, cycle months + of the year and find all positions that match the month name for + each month, If no common positions are found, the representation + does not use the month name. """ + full_indices = abbr_indices = None for m in range(1, 13): + time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_month[m])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices if self.f_month[m] != self.a_month[m]: - time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) - datetime = time.strftime(directive, time_tuple).lower() - if datetime.find(self.f_month[m]) >= 0: - return '%B' - elif datetime.find(self.a_month[m]) >= 0: - return '%b' - return '%B' + indices = set(_findall(datetime, self.a_month[m])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None + if full_indices: + return self.f_month, '%B' + if abbr_indices: + return self.a_month, '%b' + return None def __calc_timezone(self): # Set self.timezone by using time.tzname. diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index fe14570c32ba82..c9225aaf94599e 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -478,13 +478,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # - # BUG: Generates invalid regexp for a number of Arabic locales, - # br_FR, csb_PL. + # BUG: Generates invalid regexp for br_FR, csb_PL. # BUG: Generates regexp that does not match the current date and time - # for a number of Arabic locales, fa_IR, gez_ER, gez_ET, lzh_TW, - # my_MM, or_IN, shn_MM, yo_NG. + # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'eu_ES') + 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU') def test_date_time_locale(self): # Test %c directive self.roundtrip('%c', slice(0, 6)) @@ -499,10 +497,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. # BUG: Generates regexp that does not match the current date - # for a number of Arabic locales, az_IR, fa_IR, lzh_TW, my_MM, - # or_IN, shn_MM. + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'eu_ES', '') + 'he_IL', 'eu_ES', 'ar_AE') def test_date_locale(self): # Test %x directive self.roundtrip('%x', slice(0, 3)) From 8d8b01d3d9da79c41dabc33761c91f1a3e7b92a2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Oct 2024 12:54:07 +0300 Subject: [PATCH 08/16] Update NEWS file. --- .../Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst index be828b004bb20a..c179e78d7c5389 100644 --- a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -1,2 +1,5 @@ -Fix :func:`time.strptime` for ``%c`` format in locales with a short March -month name, such as French or Hebrew. +Fix :func:`time.strptime` for ``%c`` and ``%x`` format in many locales: +Arabic, Bislama, Chuvash, Estonian, French, Irish, Gurajati, Manx Gaelic, +Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, +Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, +Yue Chinese, Yau/Nungon and Chinese. From f9ff92afffde5ae6af62840e0244fdc5b9705c57 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 8 Oct 2024 22:51:44 +0300 Subject: [PATCH 09/16] Refactoring. --- Lib/_strptime.py | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 05faccd8d45230..d0a64fa88cd1ba 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -126,49 +126,48 @@ def __calc_date_time(self): # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - ('', ''), # month name, if used - (self.a_weekday[2], '%a'), - (self.am_pm[1], '%p'), + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] - # The month format is treated specially because of a possible - # ambiguity in some locales where the full and abbreviated - # month names are equal. See doc of __find_month_format for more - # details. - # - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - assert replacement_pairs[2] == ('', '') + date_time = [] + for directive in ('%c', '%x', '%X'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + current_format = current_format.replace(self.f_weekday[2], '%A') + # The month format is treated specially because of a possible + # ambiguity in some locales where the full and abbreviated + # month names are equal. See doc of __find_month_format for more + # details. month_format = self.__find_month_format(directive) if month_format: - replacement_pairs[2] = (month_format[0][3], month_format[1]) - for old, new in replacement_pairs: + current_format = current_format.replace(month_format[0][3], + month_format[1]) + current_format = current_format.replace(self.a_weekday[2], '%a') + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) - replacement_pairs[2] = ('', '') + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + for old, new in replacement_pairs: + current_format = current_format.replace(old, new) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. - time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if '00' in time.strftime(directive, time_tuple): + if '00' in time.strftime(directive, time_tuple2): U_W = '%W' else: U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) + current_format = current_format.replace('11', U_W) + date_time.append(current_format) self.LC_date_time = date_time[0] self.LC_date = date_time[1] self.LC_time = date_time[2] From 71be06d55577d0f4844777bab421d9c9f09a0f93 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Oct 2024 11:24:32 +0300 Subject: [PATCH 10/16] Update test comments. --- Lib/test/test_strptime.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index bfb276a06293fe..164dca8daf3b50 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -478,9 +478,9 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # - # BUG: Generates invalid regexp for br_FR, csb_PL, Arabic. + # BUG: Generates invalid regexp for br_FR, csb_PL. # BUG: Generates regexp that does not match the current date and time - # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. + # for az_IR, brx_IN, fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU') def test_date_time_locale(self): @@ -502,7 +502,8 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'ar_AE', 'mfe_MU') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -531,7 +532,8 @@ def test_date_locale(self): support.is_emscripten or support.is_wasi, "musl libc issue on Emscripten, bpo-46390" ) - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'eu_ES', 'ar_AE') def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) From dcf8045598089160f11361d3fab90d98651e6e70 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Oct 2024 11:29:31 +0300 Subject: [PATCH 11/16] Polishing. --- Lib/_strptime.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index d0a64fa88cd1ba..98eefd0b847451 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -142,10 +142,9 @@ def __calc_date_time(self): # ambiguity in some locales where the full and abbreviated # month names are equal. See doc of __find_month_format for more # details. - month_format = self.__find_month_format(directive) - if month_format: - current_format = current_format.replace(month_format[0][3], - month_format[1]) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt) current_format = current_format.replace(self.a_weekday[2], '%a') if self.am_pm[1]: # Must deal with possible lack of locale info @@ -200,12 +199,12 @@ def __find_month_format(self, directive): else: abbr_indices &= indices if not full_indices and not abbr_indices: - return None + return None, None if full_indices: return self.f_month, '%B' if abbr_indices: return self.a_month, '%b' - return None + return None, None def __calc_timezone(self): # Set self.timezone by using time.tzname. From 39e16b5c2acc8499a0c03bf2124f9dd06fb16e12 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Oct 2024 12:14:44 +0300 Subject: [PATCH 12/16] Fix also Breton, Kashubian and Yoruba. --- Lib/_strptime.py | 50 +++++++++++++++---- Lib/test/test_strptime.py | 9 ++-- ...4-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 8 +-- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 98eefd0b847451..09f21dd2d47265 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -114,7 +114,8 @@ def __calc_am_pm(self): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm def __calc_date_time(self): @@ -137,15 +138,16 @@ def __calc_date_time(self): for directive in ('%c', '%x', '%X'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') - current_format = current_format.replace(self.f_weekday[2], '%A') - # The month format is treated specially because of a possible - # ambiguity in some locales where the full and abbreviated - # month names are equal. See doc of __find_month_format for more - # details. + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) lst, fmt = self.__find_month_format(directive) if lst: - current_format = current_format.replace(lst[3], fmt) - current_format = current_format.replace(self.a_weekday[2], '%a') + current_format = current_format.replace(lst[3], fmt, 1) if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's @@ -206,6 +208,34 @@ def __find_month_format(self, directive): return self.a_month, '%b' return None, None + def __find_weekday_format(self, directive): + """Find the day of the week format appropriate for the current locale. + + Similar to __find_month_format(). + """ + full_indices = abbr_indices = None + for wd in range(7): + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, wd, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_weekday[wd])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + if self.f_weekday[wd] != self.a_weekday[wd]: + indices = set(_findall(datetime, self.a_weekday[wd])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_weekday, '%A' + if abbr_indices: + return self.a_weekday, '%a' + return None, None + def __calc_timezone(self): # Set self.timezone by using time.tzname. # Do not worry about possibility of time.tzname[0] == time.tzname[1] @@ -405,8 +435,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: - raise ValueError("time data %r does not match format %r" % - (data_string, format)) + raise ValueError("time data %r does not match format %r :: /%s/" % + (data_string, format, format_regex.pattern)) if len(data_string) != found.end(): raise ValueError("unconverted data remains: %s" % data_string[found.end():]) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 164dca8daf3b50..dd472a37e111c8 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -478,11 +478,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # - # BUG: Generates invalid regexp for br_FR, csb_PL. # BUG: Generates regexp that does not match the current date and time - # for az_IR, brx_IN, fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. + # for az_IR, brx_IN, fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU') + 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR') def test_date_time_locale(self): # Test %c directive now = time.time() @@ -503,7 +503,8 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'ar_AE', 'mfe_MU') + 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst index c179e78d7c5389..db8e84842aaac4 100644 --- a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -1,5 +1,5 @@ Fix :func:`time.strptime` for ``%c`` and ``%x`` format in many locales: -Arabic, Bislama, Chuvash, Estonian, French, Irish, Gurajati, Manx Gaelic, -Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, -Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, -Yue Chinese, Yau/Nungon and Chinese. +Arabic, Bislama, Breton, Kashubian, Chuvash, Estonian, French, Irish, +Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, +Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, +Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. From 391296b071d06dc2f54b6c509d782939b47e0390 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Oct 2024 13:02:21 +0300 Subject: [PATCH 13/16] Fix also Ge'ez. --- Lib/_strptime.py | 2 +- Lib/test/test_strptime.py | 6 +++--- .../Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 09f21dd2d47265..6c05d8f977ae51 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -273,7 +273,7 @@ def __init__(self, locale_time=None): 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index dd472a37e111c8..d1641f3acfc88a 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -479,10 +479,10 @@ def test_bad_timezone(self): # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # # BUG: Generates regexp that does not match the current date and time - # for az_IR, brx_IN, fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM. + # for az_IR, brx_IN, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR') + 'csb_PL', 'br_FR', 'gez_ET') def test_date_time_locale(self): # Test %c directive now = time.time() @@ -504,7 +504,7 @@ def test_date_time_locale(self): # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR') + 'csb_PL', 'br_FR', 'gez_ET') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst index db8e84842aaac4..35805a2e8cbd82 100644 --- a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -1,5 +1,5 @@ Fix :func:`time.strptime` for ``%c`` and ``%x`` format in many locales: Arabic, Bislama, Breton, Kashubian, Chuvash, Estonian, French, Irish, -Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, +Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. From ef8c18e7d3e7c166676d9d0320cd4fb2e8f6035b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Oct 2024 13:26:01 +0300 Subject: [PATCH 14/16] Fix also Bodo. --- Lib/_strptime.py | 3 +-- Lib/test/test_strptime.py | 6 +++--- .../Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 6c05d8f977ae51..89adc174e5ad30 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -194,8 +194,7 @@ def __find_month_format(self, directive): full_indices = indices else: full_indices &= indices - if self.f_month[m] != self.a_month[m]: - indices = set(_findall(datetime, self.a_month[m])) + indices = set(_findall(datetime, self.a_month[m])) if abbr_indices is None: abbr_indices = indices else: diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index d1641f3acfc88a..06cb93a26ce17a 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -479,10 +479,10 @@ def test_bad_timezone(self): # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # # BUG: Generates regexp that does not match the current date and time - # for az_IR, brx_IN, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale(self): # Test %c directive now = time.time() @@ -504,7 +504,7 @@ def test_date_time_locale(self): # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst index 35805a2e8cbd82..6895cffcf545fd 100644 --- a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -1,5 +1,5 @@ -Fix :func:`time.strptime` for ``%c`` and ``%x`` format in many locales: -Arabic, Bislama, Breton, Kashubian, Chuvash, Estonian, French, Irish, +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats in many locales: +Arabic, Bislama, Breton, Bodo, Kashubian, Chuvash, Estonian, French, Irish, Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. From f64b374f4fb555c61ae777e83526710517e6f257 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 17:50:15 +0300 Subject: [PATCH 15/16] Fix testing br_FR on old glibc. --- Lib/test/test_strptime.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 06cb93a26ce17a..41f7d09b4bf883 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -5,6 +5,7 @@ import locale import re import os +import platform import sys from test import support from test.support import warnings_helper @@ -13,6 +14,13 @@ import _strptime +libc_ver = platform.libc_ver() +if libc_ver[0] == 'glibc': + glibc_ver = tuple(map(int, libc_ver[1].split('.'))) +else: + glibc_ver = None + + class getlang_Tests(unittest.TestCase): """Test _getlang""" def test_basic(self): @@ -482,9 +490,12 @@ def test_bad_timezone(self): # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', 'id_ID') def test_date_time_locale(self): # Test %c directive + loc = locale.getlocale(locale.LC_TIME)[0] + if glibc_ver and glibc_ver < (2, 31) and loc == 'br_FR': + self.skipTest('%c in locale br_FR does not include time') now = time.time() self.roundtrip('%c', slice(0, 6), time.localtime(now)) # 1 hour 20 minutes 30 seconds ago From 408bd347467155055dfcf8bccdd3fab386664d8b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 20:20:21 +0300 Subject: [PATCH 16/16] Remove added by mistake test for id_ID. --- Lib/test/test_strptime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 41f7d09b4bf883..79f48dfe44abde 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -490,7 +490,7 @@ def test_bad_timezone(self): # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', 'id_ID') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0]