From 770c588d8fa2c94158170c62616389febf90f91c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 08:03:35 +0200 Subject: [PATCH 1/9] edit signature of Styler.apply for more consistent application with Series return objects --- pandas/io/formats/style.py | 33 ++++++++++++++++----- pandas/tests/io/formats/style/test_style.py | 24 ++++++++------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 93c3843b36846..ffb9c25808da2 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -909,7 +909,7 @@ def _update_ctx(self, attrs: DataFrame) -> None: for cn in attrs.columns: for rn, c in attrs[[cn]].itertuples(): - if not c: + if not c or pd.isna(c): continue css_list = maybe_convert_css_to_tuples(c) i, j = self.index.get_loc(rn), self.columns.get_loc(cn) @@ -980,9 +980,10 @@ def _apply( subset = slice(None) if subset is None else subset subset = non_reducing_slice(subset) data = self.data.loc[subset] - if axis is not None: - result = data.apply(func, axis=axis, result_type="expand", **kwargs) - result.columns = data.columns + if axis in [0, "index"]: + result = data.apply(func, axis=0, **kwargs) + elif axis in [1, "columns"]: + result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005 else: result = func(data, **kwargs) if not isinstance(result, DataFrame): @@ -1006,11 +1007,27 @@ def _apply( f"index and columns as the input" ) - if result.shape != data.shape: + if isinstance(result, Series): + raise ValueError( + f"Function {repr(func)} returned a Series when a DataFrame is required" + ) + msg = ( + "Function {0} created invalid {1} labels.\nUsually, this is the result " + "of the function returning a Series which contains invalid labels, or " + "returning incorrect array shapes which cannot be mapped to labels, " + "possibly due to applying the function along the wrong axis.\n" + "Result {1} has shape: {2}\n" + "Expected {1} shape: {3}" + ) + if not (all(result.index.isin(self.index))): + raise ValueError( + msg.format(repr(func), "index", result.index.shape, data.index.shape) + ) + if not (all(result.columns.isin(self.columns))): raise ValueError( - f"Function {repr(func)} returned the wrong shape.\n" - f"Result has shape: {result.shape}\n" - f"Expected shape: {data.shape}" + msg.format( + repr(func), "columns", result.columns.shape, data.columns.shape + ) ) self._update_ctx(result) return self diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 281170ab6c7cb..6b015ec0d77cd 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -925,24 +925,28 @@ def test_export(self): style2.render() def test_bad_apply_shape(self): - df = DataFrame([[1, 2], [3, 4]]) - msg = "returned the wrong shape" - with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: "x", subset=pd.IndexSlice[[0, 1], :]) + df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) + msg = "returned a Series when a DataFrame is required" with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: [""], subset=pd.IndexSlice[[0, 1], :]) + df.style._apply(lambda x: "x") - with pytest.raises(ValueError, match=msg): + msg = "created invalid {} labels" + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: [""]) + + with pytest.raises(ValueError, match=msg.format("index")): df.style._apply(lambda x: ["", "", "", ""]) - with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: ["", "", ""], subset=1) + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["A", "C"]), axis=0) - msg = "Length mismatch: Expected axis has 3 elements" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg.format("columns")): df.style._apply(lambda x: ["", "", ""], axis=1) + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["X", "Z"]), axis=1) + msg = "returned ndarray with wrong shape" with pytest.raises(ValueError, match=msg): df.style._apply(lambda x: np.array([[""], [""]]), axis=None) From c67092a9a884331a12508795c81d1188f36304af Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 10:02:41 +0200 Subject: [PATCH 2/9] add tests --- pandas/tests/io/formats/style/test_style.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 6b015ec0d77cd..fdb1b2ba9d09f 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -583,6 +583,25 @@ def test_apply_axis(self): result._compute() assert result.ctx == expected + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_series_return(self, axis): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + + # test Series return where len(Series) < df.index or df.columns but labels OK + func = lambda s: pd.Series(["color: red;"], index=["Y"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + + # test Series return where labels align but different order + func = lambda s: pd.Series(["color: red;", "color: blue;"], index=["Y", "X"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(0, 0)] == [("color", "blue")] + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + assert result[(axis, 1 - axis)] == [("color", "blue")] + @pytest.mark.parametrize( "slice_", [ From d63073c34b60c81835292cf03874f6f2e71f99ab Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 10:51:39 +0200 Subject: [PATCH 3/9] allow DataFrame returns that do not have same size but valid labels --- pandas/io/formats/style.py | 37 ++++++++------------- pandas/tests/io/formats/style/test_style.py | 22 ++++++++++-- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index ffb9c25808da2..93f6cd70384b5 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -999,35 +999,25 @@ def _apply( f"Expected shape: {data.shape}" ) result = DataFrame(result, index=data.index, columns=data.columns) - elif not ( - result.index.equals(data.index) and result.columns.equals(data.columns) - ): - raise ValueError( - f"Result of {repr(func)} must have identical " - f"index and columns as the input" - ) if isinstance(result, Series): raise ValueError( f"Function {repr(func)} returned a Series when a DataFrame is required" ) msg = ( - "Function {0} created invalid {1} labels.\nUsually, this is the result " - "of the function returning a Series which contains invalid labels, or " - "returning incorrect array shapes which cannot be mapped to labels, " - "possibly due to applying the function along the wrong axis.\n" - "Result {1} has shape: {2}\n" - "Expected {1} shape: {3}" + f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " + f"the result of the function returning a " + f"{'Series' if axis is not None else 'DataFrame'} which contains invalid " + f"labels, or returning incorrect array shapes which cannot be mapped to " + f"labels, possibly due to applying the function along the wrong axis.\n" + "Result {{0}} has shape: {{1}}\n" + "Expected {{0}} shape: {{2}}" ) - if not (all(result.index.isin(self.index))): + if not all(result.index.isin(data.index)): + raise ValueError(msg.format("index", result.index.shape, data.index.shape)) + if not all(result.columns.isin(data.columns)): raise ValueError( - msg.format(repr(func), "index", result.index.shape, data.index.shape) - ) - if not (all(result.columns.isin(self.columns))): - raise ValueError( - msg.format( - repr(func), "columns", result.columns.shape, data.columns.shape - ) + msg.format("columns", result.columns.shape, data.columns.shape) ) self._update_ctx(result) return self @@ -1047,8 +1037,9 @@ def apply( Parameters ---------- func : function - ``func`` should take a Series if ``axis`` in [0,1] and return an object - of same length, also with identical index if the object is a Series. + ``func`` should take a Series if ``axis`` in [0,1] and return a list-like + object of same length, or a Series, not necessarily of same length, with + valid index labels. ``func`` should take a DataFrame if ``axis`` is ``None`` and return either an ndarray with the same shape or a DataFrame with identical columns and index. diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index fdb1b2ba9d09f..5a85d0096cf1f 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -602,6 +602,21 @@ def test_apply_series_return(self, axis): assert result[(1 - axis, axis)] == [("color", "red")] assert result[(axis, 1 - axis)] == [("color", "blue")] + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("columns", [False, True]) + def test_apply_dataframe_return(self, index, columns): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + idxs = ["X", "Y"] if index else ["Y"] + cols = ["X", "Y"] if columns else ["Y"] + df_styles = DataFrame("color: red;", index=idxs, columns=cols) + result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx + + assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present + assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index + assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols + assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + @pytest.mark.parametrize( "slice_", [ @@ -982,12 +997,13 @@ def f(x): with pytest.raises(TypeError, match=msg): df.style._apply(f, axis=None) - def test_apply_bad_labels(self): + @pytest.mark.parametrize("axis", ["index", "columns"]) + def test_apply_bad_labels(self, axis): def f(x): - return DataFrame(index=[1, 2], columns=["a", "b"]) + return DataFrame(**{axis: ["bad", "labels"]}) df = DataFrame([[1, 2], [3, 4]]) - msg = "must have identical index and columns as the input" + msg = f"created invalid {axis} labels." with pytest.raises(ValueError, match=msg): df.style._apply(f, axis=None) From 53bdc3806427555bc96f9a051987fce691d723a1 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 10:55:26 +0200 Subject: [PATCH 4/9] allow DataFrame returns that do not have same size but valid labels --- pandas/io/formats/style.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 93f6cd70384b5..d5bde72620255 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1039,13 +1039,15 @@ def apply( func : function ``func`` should take a Series if ``axis`` in [0,1] and return a list-like object of same length, or a Series, not necessarily of same length, with - valid index labels. + valid index labels considering ``subset``. ``func`` should take a DataFrame if ``axis`` is ``None`` and return either - an ndarray with the same shape or a DataFrame with identical columns and - index. + an ndarray with the same shape or a DataFrame, not necessarily of the same + shape, with valid index and columns labels considering ``subset``. .. versionchanged:: 1.3.0 + .. versionchanged:: 1.4.0 + axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once From b3b6f0a86923c188b5b714e33cf7f38e067750a9 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 12:58:36 +0200 Subject: [PATCH 5/9] more descript error msgs --- pandas/io/formats/style.py | 4 +++- pandas/tests/io/formats/style/test_style.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d5bde72620255..9cb3a38f802a7 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1002,7 +1002,9 @@ def _apply( if isinstance(result, Series): raise ValueError( - f"Function {repr(func)} returned a Series when a DataFrame is required" + f"Function {repr(func)} resulted in the apply method collapsing to a " + f"Series.\nUsually this is the result of a function returning a single" + f"value, instead of list-like." ) msg = ( f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 5a85d0096cf1f..aee2335149734 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -961,7 +961,7 @@ def test_export(self): def test_bad_apply_shape(self): df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) - msg = "returned a Series when a DataFrame is required" + msg = "resulted in the apply method collapsing to a Series." with pytest.raises(ValueError, match=msg): df.style._apply(lambda x: "x") From b6fc1b09a66fb0f0509bfc0122316a22cd48c00f Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 13:01:58 +0200 Subject: [PATCH 6/9] more descript error msgs --- pandas/io/formats/style.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9cb3a38f802a7..b4b28f6b59ee4 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1003,15 +1003,16 @@ def _apply( if isinstance(result, Series): raise ValueError( f"Function {repr(func)} resulted in the apply method collapsing to a " - f"Series.\nUsually this is the result of a function returning a single" - f"value, instead of list-like." + f"Series.\nUsually, this is the result of the function returning a " + f"single value, instead of list-like." ) msg = ( f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " f"the result of the function returning a " f"{'Series' if axis is not None else 'DataFrame'} which contains invalid " - f"labels, or returning incorrect array shapes which cannot be mapped to " - f"labels, possibly due to applying the function along the wrong axis.\n" + f"labels, or returning an incorrectly shaped, list-like object which " + f"cannot be mapped to labels, possibly due to applying the function along " + f"the wrong axis.\n" "Result {{0}} has shape: {{1}}\n" "Expected {{0}} shape: {{2}}" ) From 681c0c80c6df84c6a41e8bb082cf9d248a27b1bb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 15 Jun 2021 13:23:48 +0200 Subject: [PATCH 7/9] whats new --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/io/formats/style.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 166ea2f0d4164..2c00e555877a6 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -207,7 +207,7 @@ ExtensionArray Styler ^^^^^^ -- +- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels. - Other diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b4b28f6b59ee4..f4f21b3483055 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1013,8 +1013,8 @@ def _apply( f"labels, or returning an incorrectly shaped, list-like object which " f"cannot be mapped to labels, possibly due to applying the function along " f"the wrong axis.\n" - "Result {{0}} has shape: {{1}}\n" - "Expected {{0}} shape: {{2}}" + f"Result {{0}} has shape: {{1}}\n" + f"Expected {{0}} shape: {{2}}" ) if not all(result.index.isin(data.index)): raise ValueError(msg.format("index", result.index.shape, data.index.shape)) From ddbcd6b7bbe076f8f84c2bffc4b66924cb782dfc Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 15 Aug 2021 12:20:08 +0200 Subject: [PATCH 8/9] issue number (jreback) --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e8f42d210284d..a7b847d0fd5f5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -349,7 +349,7 @@ ExtensionArray Styler ^^^^^^ -- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels. +- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`) - Other From fc55b5e5e55f0ddce1ae316eadb6fdbdaceeeb55 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 15 Aug 2021 12:57:01 +0200 Subject: [PATCH 9/9] example (jreback) --- pandas/io/formats/style.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0cf58b1f10926..2c0c572b21a86 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1250,6 +1250,13 @@ def apply( >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) ... # doctest: +SKIP + Using a function which returns a Series / DataFrame of unequal length but + containing valid index labels + + >>> df = pd.DataFrame([[1, 2], [3, 4], [4, 6]], index=["A1", "A2", "Total"]) + >>> total_style = pd.Series("font-weight: bold;", index=["Total"]) + >>> df.style.apply(lambda s: total_style) # doctest: +SKIP + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for more details. """