diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 84fdc4600dc63..74f69ef73b822 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -365,6 +365,8 @@ Styler - Minor bug in :class:`.Styler` where the ``uuid`` at initialization maintained a floating underscore (:issue:`43037`) - Bug in :meth:`.Styler.to_html` where the ``Styler`` object was updated if the ``to_html`` method was called with some args (:issue:`43034`) - Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`) +- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`) +- Other ^^^^^ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index eff0bd9637859..81bd14629cfd3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1024,7 +1024,7 @@ def _update_ctx(self, attrs: DataFrame) -> None: for cn in attrs.columns: for rn, c in attrs[[cn]].itertuples(): - if not c: + if not c or pd.isna(c): continue css_list = maybe_convert_css_to_tuples(c) i, j = self.index.get_loc(rn), self.columns.get_loc(cn) @@ -1148,9 +1148,10 @@ def _apply( subset = slice(None) if subset is None else subset subset = non_reducing_slice(subset) data = self.data.loc[subset] - if axis is not None: - result = data.apply(func, axis=axis, result_type="expand", **kwargs) - result.columns = data.columns + if axis in [0, "index"]: + result = data.apply(func, axis=0, **kwargs) + elif axis in [1, "columns"]: + result = data.T.apply(func, axis=0, **kwargs).T # see GH 42005 else: result = func(data, **kwargs) if not isinstance(result, DataFrame): @@ -1166,19 +1167,28 @@ def _apply( f"Expected shape: {data.shape}" ) result = DataFrame(result, index=data.index, columns=data.columns) - elif not ( - result.index.equals(data.index) and result.columns.equals(data.columns) - ): - raise ValueError( - f"Result of {repr(func)} must have identical " - f"index and columns as the input" - ) - if result.shape != data.shape: + if isinstance(result, Series): raise ValueError( - f"Function {repr(func)} returned the wrong shape.\n" - f"Result has shape: {result.shape}\n" - f"Expected shape: {data.shape}" + f"Function {repr(func)} resulted in the apply method collapsing to a " + f"Series.\nUsually, this is the result of the function returning a " + f"single value, instead of list-like." + ) + msg = ( + f"Function {repr(func)} created invalid {{0}} labels.\nUsually, this is " + f"the result of the function returning a " + f"{'Series' if axis is not None else 'DataFrame'} which contains invalid " + f"labels, or returning an incorrectly shaped, list-like object which " + f"cannot be mapped to labels, possibly due to applying the function along " + f"the wrong axis.\n" + f"Result {{0}} has shape: {{1}}\n" + f"Expected {{0}} shape: {{2}}" + ) + if not all(result.index.isin(data.index)): + raise ValueError(msg.format("index", result.index.shape, data.index.shape)) + if not all(result.columns.isin(data.columns)): + raise ValueError( + msg.format("columns", result.columns.shape, data.columns.shape) ) self._update_ctx(result) return self @@ -1198,14 +1208,17 @@ def apply( Parameters ---------- func : function - ``func`` should take a Series if ``axis`` in [0,1] and return an object - of same length, also with identical index if the object is a Series. + ``func`` should take a Series if ``axis`` in [0,1] and return a list-like + object of same length, or a Series, not necessarily of same length, with + valid index labels considering ``subset``. ``func`` should take a DataFrame if ``axis`` is ``None`` and return either - an ndarray with the same shape or a DataFrame with identical columns and - index. + an ndarray with the same shape or a DataFrame, not necessarily of the same + shape, with valid index and columns labels considering ``subset``. .. versionchanged:: 1.3.0 + .. versionchanged:: 1.4.0 + axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once @@ -1260,6 +1273,13 @@ def apply( >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) ... # doctest: +SKIP + Using a function which returns a Series / DataFrame of unequal length but + containing valid index labels + + >>> df = pd.DataFrame([[1, 2], [3, 4], [4, 6]], index=["A1", "A2", "Total"]) + >>> total_style = pd.Series("font-weight: bold;", index=["Total"]) + >>> df.style.apply(lambda s: total_style) # doctest: +SKIP + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for more details. """ diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index a69d65a57bca0..5022a1eaa2c6e 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -550,6 +550,40 @@ def test_apply_axis(self): result._compute() assert result.ctx == expected + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_series_return(self, axis): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + + # test Series return where len(Series) < df.index or df.columns but labels OK + func = lambda s: pd.Series(["color: red;"], index=["Y"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + + # test Series return where labels align but different order + func = lambda s: pd.Series(["color: red;", "color: blue;"], index=["Y", "X"]) + result = df.style.apply(func, axis=axis)._compute().ctx + assert result[(0, 0)] == [("color", "blue")] + assert result[(1, 1)] == [("color", "red")] + assert result[(1 - axis, axis)] == [("color", "red")] + assert result[(axis, 1 - axis)] == [("color", "blue")] + + @pytest.mark.parametrize("index", [False, True]) + @pytest.mark.parametrize("columns", [False, True]) + def test_apply_dataframe_return(self, index, columns): + # GH 42014 + df = DataFrame([[1, 2], [3, 4]], index=["X", "Y"], columns=["X", "Y"]) + idxs = ["X", "Y"] if index else ["Y"] + cols = ["X", "Y"] if columns else ["Y"] + df_styles = DataFrame("color: red;", index=idxs, columns=cols) + result = df.style.apply(lambda x: df_styles, axis=None)._compute().ctx + + assert result[(1, 1)] == [("color", "red")] # (Y,Y) styles always present + assert (result[(0, 1)] == [("color", "red")]) is index # (X,Y) only if index + assert (result[(1, 0)] == [("color", "red")]) is columns # (Y,X) only if cols + assert (result[(0, 0)] == [("color", "red")]) is (index and columns) # (X,X) + @pytest.mark.parametrize( "slice_", [ @@ -794,24 +828,28 @@ def test_export(self): style2.to_html() def test_bad_apply_shape(self): - df = DataFrame([[1, 2], [3, 4]]) - msg = "returned the wrong shape" - with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: "x", subset=pd.IndexSlice[[0, 1], :]) + df = DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["X", "Y"]) + msg = "resulted in the apply method collapsing to a Series." with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: [""], subset=pd.IndexSlice[[0, 1], :]) + df.style._apply(lambda x: "x") - with pytest.raises(ValueError, match=msg): + msg = "created invalid {} labels" + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: [""]) + + with pytest.raises(ValueError, match=msg.format("index")): df.style._apply(lambda x: ["", "", "", ""]) - with pytest.raises(ValueError, match=msg): - df.style._apply(lambda x: ["", "", ""], subset=1) + with pytest.raises(ValueError, match=msg.format("index")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["A", "C"]), axis=0) - msg = "Length mismatch: Expected axis has 3 elements" - with pytest.raises(ValueError, match=msg): + with pytest.raises(ValueError, match=msg.format("columns")): df.style._apply(lambda x: ["", "", ""], axis=1) + with pytest.raises(ValueError, match=msg.format("columns")): + df.style._apply(lambda x: pd.Series(["a:v;", ""], index=["X", "Z"]), axis=1) + msg = "returned ndarray with wrong shape" with pytest.raises(ValueError, match=msg): df.style._apply(lambda x: np.array([[""], [""]]), axis=None) @@ -828,12 +866,13 @@ def f(x): with pytest.raises(TypeError, match=msg): df.style._apply(f, axis=None) - def test_apply_bad_labels(self): + @pytest.mark.parametrize("axis", ["index", "columns"]) + def test_apply_bad_labels(self, axis): def f(x): - return DataFrame(index=[1, 2], columns=["a", "b"]) + return DataFrame(**{axis: ["bad", "labels"]}) df = DataFrame([[1, 2], [3, 4]]) - msg = "must have identical index and columns as the input" + msg = f"created invalid {axis} labels." with pytest.raises(ValueError, match=msg): df.style._apply(f, axis=None)