-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
[#38390] Bug fix for Datetime64Formatter with values of ndim > 1 #38391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bb83864
460f33c
39171e9
3ba0925
aac5f1b
14c64cd
12dd233
ec0aa15
6da6c68
da0f037
4140453
1563c4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1514,18 +1514,25 @@ def __init__( | |
|
||
def _format_strings(self) -> List[str]: | ||
""" we by definition have DO NOT have a TZ """ | ||
values = self.values | ||
|
||
if not isinstance(values, DatetimeIndex): | ||
values = DatetimeIndex(values) | ||
values = np.asarray(self.values) | ||
flat_values = values.ravel() if values.ndim > 1 else values | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think that would work. The only thing is that if |
||
flat_values = DatetimeArray(flat_values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DatetimeArray can handle 2D values, so some of the ravel/ndim checks might not be needed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I noticed it's able to handle 2D values, but the extension array that I was fixing this for can have higher dimensions, so it would still need to check above that plus when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK no problem |
||
|
||
if self.formatter is not None and callable(self.formatter): | ||
return [self.formatter(x) for x in values] | ||
fmt_values = [self.formatter(x) for x in flat_values] | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we not using just the _format_native_types here? this makes this way more complicated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, which line are you referring to? Did you mean instead of calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the issue is that self.formatter may be user-supplied at some point in the process? |
||
fmt_values = flat_values._format_native_types( | ||
na_rep=self.nat_rep, date_format=self.date_format | ||
) | ||
|
||
fmt_values = values._data._format_native_types( | ||
na_rep=self.nat_rep, date_format=self.date_format | ||
) | ||
return fmt_values.tolist() | ||
if values.ndim > 1: | ||
fmt_values = np.asarray(fmt_values).reshape(values.shape) | ||
nested_formatter = GenericArrayFormatter(fmt_values) | ||
fmt_values = nested_formatter.get_result() | ||
elif isinstance(fmt_values, np.ndarray): | ||
fmt_values = fmt_values.tolist() | ||
|
||
return fmt_values | ||
|
||
|
||
class ExtensionArrayFormatter(GenericArrayFormatter): | ||
|
@@ -1700,11 +1707,19 @@ class Datetime64TZFormatter(Datetime64Formatter): | |
def _format_strings(self) -> List[str]: | ||
""" we by definition have a TZ """ | ||
values = self.values.astype(object) | ||
ido = is_dates_only(values) | ||
flat_values = values.ravel() if values.ndim > 1 else values | ||
|
||
ido = is_dates_only(flat_values) | ||
formatter = self.formatter or get_format_datetime64( | ||
ido, date_format=self.date_format | ||
) | ||
fmt_values = [formatter(x) for x in values] | ||
|
||
fmt_values = [formatter(x) for x in flat_values] | ||
|
||
if values.ndim > 1: | ||
fmt_values = np.asarray(fmt_values).reshape(values.shape) | ||
nested_formatter = GenericArrayFormatter(fmt_values) | ||
fmt_values = nested_formatter.get_result() | ||
|
||
return fmt_values | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3106,6 +3106,94 @@ def format_func(x): | |
result = formatter.get_result() | ||
assert result == ["10:10", "12:12"] | ||
|
||
def test_datetime64formatter_2d_array(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need any tz-aware cases? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, good idea. It's likely to fail the same way so I'll add that too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
x = pd.date_range("2018-01-01", periods=10, freq="H").to_numpy() | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((5, 2))) | ||
result = formatter.get_result() | ||
assert len(result) == 5 | ||
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00]" | ||
assert result[4].strip() == "[2018-01-01 08:00:00, 2018-01-01 09:00:00]" | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((2, 5))) | ||
result = formatter.get_result() | ||
assert len(result) == 2 | ||
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00, 201..." | ||
assert result[1].strip() == "[2018-01-01 05:00:00, 2018-01-01 06:00:00, 201..." | ||
|
||
def test_datetime64formatter_3d_array(self): | ||
x = pd.date_range("2018-01-01", periods=10, freq="H").to_numpy() | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((10, 1, 1))) | ||
result = formatter.get_result() | ||
assert len(result) == 10 | ||
assert result[0].strip() == "[[2018-01-01 00:00:00]]" | ||
assert result[9].strip() == "[[2018-01-01 09:00:00]]" | ||
|
||
def test_datetime64formatter_2d_array_format_func(self): | ||
x = pd.date_range("2018-01-01", periods=24, freq="H").to_numpy() | ||
|
||
def format_func(t): | ||
return t.strftime("%H-%m") | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((4, 2, 3)), formatter=format_func) | ||
result = formatter.get_result() | ||
assert len(result) == 4 | ||
assert result[0].strip() == "[[00-01, 01-01, 02-01], [03-01, 04-01, 05-01]]" | ||
assert result[3].strip() == "[[18-01, 19-01, 20-01], [21-01, 22-01, 23-01]]" | ||
|
||
|
||
class TestDatetime64TZFormatter: | ||
def test_mixed(self): | ||
utc = dateutil.tz.tzutc() | ||
x = Series( | ||
[ | ||
datetime(2013, 1, 1, tzinfo=utc), | ||
datetime(2013, 1, 1, 12, tzinfo=utc), | ||
pd.NaT, | ||
] | ||
) | ||
result = fmt.Datetime64TZFormatter(x).get_result() | ||
assert len(result) == 3 | ||
assert result[0].strip() == "2013-01-01 00:00:00+00:00" | ||
assert result[1].strip() == "2013-01-01 12:00:00+00:00" | ||
assert result[2].strip() == "NaT" | ||
|
||
def test_datetime64formatter_1d_array(self): | ||
x = pd.date_range("2018-01-01", periods=3, freq="H", tz="US/Pacific").to_numpy() | ||
formatter = fmt.Datetime64TZFormatter(x) | ||
result = formatter.get_result() | ||
assert len(result) == 3 | ||
assert result[0].strip() == "2018-01-01 00:00:00-08:00" | ||
assert result[1].strip() == "2018-01-01 01:00:00-08:00" | ||
assert result[2].strip() == "2018-01-01 02:00:00-08:00" | ||
|
||
def test_datetime64formatter_2d_array(self): | ||
x = pd.date_range( | ||
"2018-01-01", periods=10, freq="H", tz="US/Pacific" | ||
).to_numpy() | ||
formatter = fmt.Datetime64TZFormatter(x.reshape((5, 2))) | ||
result = formatter.get_result() | ||
assert len(result) == 5 | ||
assert result[0].strip() == "[2018-01-01 00:00:00-08:00, 2018-01-01 01:00:0..." | ||
assert result[4].strip() == "[2018-01-01 08:00:00-08:00, 2018-01-01 09:00:0..." | ||
|
||
def test_datetime64formatter_2d_array_format_func(self): | ||
x = pd.date_range( | ||
"2018-01-01", periods=16, freq="H", tz="US/Pacific" | ||
).to_numpy() | ||
|
||
def format_func(t): | ||
return t.strftime("%H-%m %Z") | ||
|
||
formatter = fmt.Datetime64TZFormatter( | ||
x.reshape((4, 2, 2)), formatter=format_func | ||
) | ||
result = formatter.get_result() | ||
assert len(result) == 4 | ||
assert result[0].strip() == "[[00-01 PST, 01-01 PST], [02-01 PST, 03-01 PST]]" | ||
assert result[3].strip() == "[[12-01 PST, 13-01 PST], [14-01 PST, 15-01 PST]]" | ||
|
||
|
||
class TestNaTFormatting: | ||
def test_repr(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is not user facing (as its internal), is there anything that is?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bug was found when calling
repr()
on anExtensionType
, I will put this in terms of that instead.