-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN/TYP: pandas/io/formats/excel.py #37862
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
8d5d4bb
b066f5f
3484f53
5c2f160
ee884ca
dab9e15
1ff8cb8
ee09d4d
c5e1d9c
7d27a71
2f02811
a63b7d1
9fcfb32
aa48fb6
f4abe27
f468397
4cf8f83
084e953
8a9a40a
2ee661c
c88aff9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
from functools import reduce | ||
import itertools | ||
import re | ||
from typing import Callable, Dict, Mapping, Optional, Sequence, Union | ||
from typing import Callable, Dict, Iterator, Mapping, Optional, Sequence, Sized, Union | ||
import warnings | ||
|
||
import numpy as np | ||
|
@@ -522,16 +522,15 @@ def _format_value(self, val): | |
) | ||
return val | ||
|
||
def _format_header_mi(self): | ||
def _format_header_mi(self) -> Iterator[ExcelCell]: | ||
if self.columns.nlevels > 1: | ||
if not self.index: | ||
raise NotImplementedError( | ||
"Writing to Excel with MultiIndex columns and no " | ||
"index ('index'=False) is not yet implemented." | ||
) | ||
|
||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if not (has_aliases or self.header): | ||
if not (self._has_aliases or self.header): | ||
return | ||
|
||
columns = self.columns | ||
|
@@ -547,28 +546,34 @@ def _format_header_mi(self): | |
|
||
if self.merge_cells: | ||
# Format multi-index as a merged cells. | ||
for lnum in range(len(level_lengths)): | ||
name = columns.names[lnum] | ||
yield ExcelCell(lnum, coloffset, name, self.header_style) | ||
for lnum, name in enumerate(columns.names): | ||
yield ExcelCell( | ||
row=lnum, | ||
col=coloffset, | ||
val=name, | ||
style=self.header_style, | ||
) | ||
|
||
for lnum, (spans, levels, level_codes) in enumerate( | ||
zip(level_lengths, columns.levels, columns.codes) | ||
): | ||
values = levels.take(level_codes) | ||
for i in spans: | ||
kwargs = dict( | ||
row=lnum, | ||
col=coloffset + i + 1, | ||
val=values[i], | ||
style=self.header_style, | ||
) | ||
|
||
if spans[i] > 1: | ||
yield ExcelCell( | ||
lnum, | ||
coloffset + i + 1, | ||
values[i], | ||
self.header_style, | ||
lnum, | ||
coloffset + i + spans[i], | ||
) | ||
else: | ||
yield ExcelCell( | ||
lnum, coloffset + i + 1, values[i], self.header_style | ||
kwargs.update( | ||
{ | ||
"mergestart": lnum, | ||
"mergeend": coloffset + i + spans[i], | ||
} | ||
) | ||
yield ExcelCell(**kwargs) | ||
else: | ||
# Format in legacy format with dots to indicate levels. | ||
for i, values in enumerate(zip(*level_strs)): | ||
|
@@ -577,9 +582,8 @@ def _format_header_mi(self): | |
|
||
self.rowcounter = lnum | ||
|
||
def _format_header_regular(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_header_regular(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
coloffset = 0 | ||
|
||
if self.index: | ||
|
@@ -588,17 +592,11 @@ def _format_header_regular(self): | |
coloffset = len(self.df.index[0]) | ||
|
||
colnames = self.columns | ||
if has_aliases: | ||
# pandas\io\formats\excel.py:593: error: Argument 1 to "len" | ||
# has incompatible type "Union[Sequence[Optional[Hashable]], | ||
# bool]"; expected "Sized" [arg-type] | ||
if len(self.header) != len(self.columns): # type: ignore[arg-type] | ||
# pandas\io\formats\excel.py:602: error: Argument 1 to | ||
# "len" has incompatible type | ||
# "Union[Sequence[Optional[Hashable]], bool]"; expected | ||
# "Sized" [arg-type] | ||
if self._has_aliases: | ||
assert isinstance(self.header, Sized) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. glad to see this. for some reason my local mypy diagrees with the CI about whether its L601 or L602 that has the problem There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that the problem was with len(self.header), so it should be line 602, but presumably one line was added on top since then. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. once we add a runtime isinstance, this sort of duplicates the _has_aliases property. since self.header is a sequence or bool, maybe _has_aliases could reflect that instead of the explicit checks for the 4 classes. if we have a guarantee from the is_* call, a cast would be more appropriate than the assert. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the types are correct for the header parameter of ExcelFormatter then this should be fine. also cast is a no-op, so no harm using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, changed the casting as you suggested. |
||
if len(self.header) != len(self.columns): | ||
raise ValueError( | ||
f"Writing {len(self.columns)} cols " # type: ignore[arg-type] | ||
f"Writing {len(self.columns)} cols " | ||
f"but got {len(self.header)} aliases" | ||
) | ||
else: | ||
|
@@ -609,7 +607,7 @@ def _format_header_regular(self): | |
self.rowcounter, colindex + coloffset, colname, self.header_style | ||
) | ||
|
||
def _format_header(self): | ||
def _format_header(self) -> Iterator[ExcelCell]: | ||
if isinstance(self.columns, MultiIndex): | ||
gen = self._format_header_mi() | ||
else: | ||
|
@@ -631,15 +629,14 @@ def _format_header(self): | |
self.rowcounter += 1 | ||
return itertools.chain(gen, gen2) | ||
|
||
def _format_body(self): | ||
def _format_body(self) -> Iterator[ExcelCell]: | ||
if isinstance(self.df.index, MultiIndex): | ||
return self._format_hierarchical_rows() | ||
else: | ||
return self._format_regular_rows() | ||
|
||
def _format_regular_rows(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_regular_rows(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
self.rowcounter += 1 | ||
|
||
# output index and index_label? | ||
|
@@ -676,9 +673,8 @@ def _format_regular_rows(self): | |
|
||
yield from self._generate_body(coloffset) | ||
|
||
def _format_hierarchical_rows(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_hierarchical_rows(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
self.rowcounter += 1 | ||
|
||
gcolidx = 0 | ||
|
@@ -722,39 +718,42 @@ def _format_hierarchical_rows(self): | |
) | ||
|
||
for i in spans: | ||
kwargs = dict( | ||
row=self.rowcounter + i, | ||
col=gcolidx, | ||
val=values[i], | ||
style=self.header_style, | ||
) | ||
if spans[i] > 1: | ||
yield ExcelCell( | ||
self.rowcounter + i, | ||
gcolidx, | ||
values[i], | ||
self.header_style, | ||
self.rowcounter + i + spans[i] - 1, | ||
gcolidx, | ||
) | ||
else: | ||
yield ExcelCell( | ||
self.rowcounter + i, | ||
gcolidx, | ||
values[i], | ||
self.header_style, | ||
kwargs.update( | ||
{ | ||
"mergestart": self.rowcounter + i + spans[i] - 1, | ||
"mergeend": gcolidx, | ||
} | ||
) | ||
yield ExcelCell(**kwargs) | ||
gcolidx += 1 | ||
|
||
else: | ||
# Format hierarchical rows with non-merged values. | ||
for indexcolvals in zip(*self.df.index): | ||
for idx, indexcolval in enumerate(indexcolvals): | ||
yield ExcelCell( | ||
self.rowcounter + idx, | ||
gcolidx, | ||
indexcolval, | ||
self.header_style, | ||
row=self.rowcounter + idx, | ||
col=gcolidx, | ||
val=indexcolval, | ||
style=self.header_style, | ||
) | ||
gcolidx += 1 | ||
|
||
yield from self._generate_body(gcolidx) | ||
|
||
def _generate_body(self, coloffset: int): | ||
@property | ||
def _has_aliases(self) -> bool: | ||
"""Whether the aliases for column names are present.""" | ||
return bool(isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))) | ||
simonjayhawkins marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def _generate_body(self, coloffset: int) -> Iterator[ExcelCell]: | ||
if self.styler is None: | ||
styles = None | ||
else: | ||
|
@@ -771,7 +770,7 @@ def _generate_body(self, coloffset: int): | |
xlstyle = self.style_converter(";".join(styles[i, colidx])) | ||
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) | ||
|
||
def get_formatted_cells(self): | ||
def get_formatted_cells(self) -> Iterator[ExcelCell]: | ||
for cell in itertools.chain(self._format_header(), self._format_body()): | ||
cell.val = self._format_value(cell.val) | ||
yield cell | ||
|
Uh oh!
There was an error while loading. Please reload this page.