Skip to content

BUG: Styler.apply consistently manages Series return objects aligning labels. #42014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
770c588
edit signature of Styler.apply for more consistent application with S…
attack68 Jun 15, 2021
c67092a
add tests
attack68 Jun 15, 2021
d63073c
allow DataFrame returns that do not have same size but valid labels
attack68 Jun 15, 2021
53bdc38
allow DataFrame returns that do not have same size but valid labels
attack68 Jun 15, 2021
b3b6f0a
more descript error msgs
attack68 Jun 15, 2021
b6fc1b0
more descript error msgs
attack68 Jun 15, 2021
681c0c8
whats new
attack68 Jun 15, 2021
c3c7a40
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Jun 20, 2021
77557b5
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Jun 22, 2021
e5fc3b3
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Jun 29, 2021
5ddb99b
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Jul 30, 2021
6c2a194
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 6, 2021
cd4614e
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 11, 2021
1095b21
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 12, 2021
9f6aca5
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 15, 2021
ddbcd6b
issue number (jreback)
attack68 Aug 15, 2021
fc55b5e
example (jreback)
attack68 Aug 15, 2021
a5c0fdb
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 17, 2021
7e72853
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 18, 2021
9599379
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 19, 2021
65411b7
Merge remote-tracking branch 'upstream/master' into styler_consistent…
attack68 Aug 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,8 @@ Styler
- Minor bug in :class:`.Styler` where the ``uuid`` at initialization maintained a floating underscore (:issue:`43037`)
- Bug in :meth:`.Styler.to_html` where the ``Styler`` object was updated if the ``to_html`` method was called with some args (:issue:`43034`)
- Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`)
- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`)
-

Other
^^^^^
Expand Down
58 changes: 39 additions & 19 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ def _update_ctx(self, attrs: DataFrame) -> None:

for cn in attrs.columns:
for rn, c in attrs[[cn]].itertuples():
if not c:
if not c or pd.isna(c):
continue
css_list = maybe_convert_css_to_tuples(c)
i, j = self.index.get_loc(rn), self.columns.get_loc(cn)
Expand Down Expand Up @@ -1148,9 +1148,10 @@ def _apply(
subset = slice(None) if subset is None else subset
subset = non_reducing_slice(subset)
data = self.data.loc[subset]
if axis is not None:
result = data.apply(func, axis=axis, result_type="expand", **kwargs)
result.columns = data.columns
if axis in [0, "index"]:
result = data.apply(func, axis=0, **kwargs)
elif axis in [1, "columns"]:
result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005
else:
result = func(data, **kwargs)
if not isinstance(result, DataFrame):
Expand All @@ -1166,19 +1167,28 @@ def _apply(
f"Expected shape: {data.shape}"
)
result = DataFrame(result, index=data.index, columns=data.columns)
elif not (
result.index.equals(data.index) and result.columns.equals(data.columns)
):
raise ValueError(
f"Result of {repr(func)} must have identical "
f"index and columns as the input"
)

if result.shape != data.shape:
if isinstance(result, Series):
raise ValueError(
f"Function {repr(func)} returned the wrong shape.\n"
f"Result has shape: {result.shape}\n"
f"Expected shape: {data.shape}"
f"Function {repr(func)} resulted in the apply method collapsing to a "
f"Series.\nUsually, this is the result of the function returning a "
f"single value, instead of list-like."
)
msg = (
f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is "
f"the result of the function returning a "
f"{'Series' if axis is not None else 'DataFrame'} which contains invalid "
f"labels, or returning an incorrectly shaped, list-like object which "
f"cannot be mapped to labels, possibly due to applying the function along "
f"the wrong axis.\n"
f"Result {{0}} has shape: {{1}}\n"
f"Expected {{0}} shape: {{2}}"
)
if not all(result.index.isin(data.index)):
raise ValueError(msg.format("index", result.index.shape, data.index.shape))
if not all(result.columns.isin(data.columns)):
raise ValueError(
msg.format("columns", result.columns.shape, data.columns.shape)
)
self._update_ctx(result)
return self
Expand All @@ -1198,14 +1208,17 @@ def apply(
Parameters
----------
func : function
``func`` should take a Series if ``axis`` in [0,1] and return an object
of same length, also with identical index if the object is a Series.
``func`` should take a Series if ``axis`` in [0,1] and return a list-like
object of same length, or a Series, not necessarily of same length, with
valid index labels considering ``subset``.
``func`` should take a DataFrame if ``axis`` is ``None`` and return either
an ndarray with the same shape or a DataFrame with identical columns and
index.
an ndarray with the same shape or a DataFrame, not necessarily of the same
shape, with valid index and columns labels considering ``subset``.

.. versionchanged:: 1.3.0

.. versionchanged:: 1.4.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the examples to show a Series return matching labels/index (e.g. not the same length)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added


axis : {0 or 'index', 1 or 'columns', None}, default 0
Apply to each column (``axis=0`` or ``'index'``), to each row
(``axis=1`` or ``'columns'``), or to the entire DataFrame at once
Expand Down Expand Up @@ -1260,6 +1273,13 @@ def apply(
>>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A"))
... # doctest: +SKIP

Using a function which returns a Series / DataFrame of unequal length but
containing valid index labels

>>> df = pd.DataFrame([[1, 2], [3, 4], [4, 6]], index=["A1", "A2", "Total"])
>>> total_style = pd.Series("font-weight: bold;", index=["Total"])
>>> df.style.apply(lambda s: total_style) # doctest: +SKIP

See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for
more details.
"""
Expand Down
65 changes: 52 additions & 13 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,40 @@ def test_apply_axis(self):
result._compute()
assert result.ctx == expected

@pytest.mark.parametrize("axis", [0, 1])
def test_apply_series_return(self, axis):
# GH 42014
df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"])

# test Series return where len(Series) < df.index or df.columns but labels OK
func = lambda s: pd.Series(["color: red;"], index=["Y"])
result = df.style.apply(func, axis=axis)._compute().ctx
assert result[(1, 1)] == [("color", "red")]
assert result[(1 - axis, axis)] == [("color", "red")]

# test Series return where labels align but different order
func = lambda s: pd.Series(["color: red;", "color: blue;"], index=["Y", "X"])
result = df.style.apply(func, axis=axis)._compute().ctx
assert result[(0, 0)] == [("color", "blue")]
assert result[(1, 1)] == [("color", "red")]
assert result[(1 - axis, axis)] == [("color", "red")]
assert result[(axis, 1 - axis)] == [("color", "blue")]

@pytest.mark.parametrize("index", [False, True])
@pytest.mark.parametrize("columns", [False, True])
def test_apply_dataframe_return(self, index, columns):
# GH 42014
df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"])
idxs = ["X", "Y"] if index else ["Y"]
cols = ["X", "Y"] if columns else ["Y"]
df_styles = DataFrame("color: red;", index=idxs, columns=cols)
result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx

assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present
assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index
assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols
assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X)

@pytest.mark.parametrize(
"slice_",
[
Expand Down Expand Up @@ -794,24 +828,28 @@ def test_export(self):
style2.to_html()

def test_bad_apply_shape(self):
df = DataFrame([[1, 2], [3, 4]])
msg = "returned the wrong shape"
with pytest.raises(ValueError, match=msg):
df.style._apply(lambda x: "x", subset=pd.IndexSlice[[0, 1], :])
df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"])

msg = "resulted in the apply method collapsing to a Series."
with pytest.raises(ValueError, match=msg):
df.style._apply(lambda x: [""], subset=pd.IndexSlice[[0, 1], :])
df.style._apply(lambda x: "x")

with pytest.raises(ValueError, match=msg):
msg = "created invalid {} labels"
with pytest.raises(ValueError, match=msg.format("index")):
df.style._apply(lambda x: [""])

with pytest.raises(ValueError, match=msg.format("index")):
df.style._apply(lambda x: ["", "", "", ""])

with pytest.raises(ValueError, match=msg):
df.style._apply(lambda x: ["", "", ""], subset=1)
with pytest.raises(ValueError, match=msg.format("index")):
df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["A", "C"]), axis=0)

msg = "Length mismatch: Expected axis has 3 elements"
with pytest.raises(ValueError, match=msg):
with pytest.raises(ValueError, match=msg.format("columns")):
df.style._apply(lambda x: ["", "", ""], axis=1)

with pytest.raises(ValueError, match=msg.format("columns")):
df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["X", "Z"]), axis=1)

msg = "returned ndarray with wrong shape"
with pytest.raises(ValueError, match=msg):
df.style._apply(lambda x: np.array([[""], [""]]), axis=None)
Expand All @@ -828,12 +866,13 @@ def f(x):
with pytest.raises(TypeError, match=msg):
df.style._apply(f, axis=None)

def test_apply_bad_labels(self):
@pytest.mark.parametrize("axis", ["index", "columns"])
def test_apply_bad_labels(self, axis):
def f(x):
return DataFrame(index=[1, 2], columns=["a", "b"])
return DataFrame(**{axis: ["bad", "labels"]})

df = DataFrame([[1, 2], [3, 4]])
msg = "must have identical index and columns as the input"
msg = f"created invalid {axis} labels."
with pytest.raises(ValueError, match=msg):
df.style._apply(f, axis=None)

Expand Down