From 7d9280ae36e76d2be8eb2a302b6db38a7df5afa5 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Mon, 25 Jul 2022 19:02:36 +0200 Subject: [PATCH 1/7] ENH: add basic method --- pandas/io/formats/style_render.py | 96 ++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 65720e675a77a..fbfb89759cee4 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -8,6 +8,7 @@ Callable, DefaultDict, Dict, + Iterable, List, Optional, Sequence, @@ -22,7 +23,10 @@ from pandas._config import get_option from pandas._libs import lib -from pandas._typing import Level +from pandas._typing import ( + Axis, + Level, +) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import ( @@ -1345,6 +1349,96 @@ def format_index( return self + def relabel_index( + self, + labels: Iterable, + axis: Axis = 0, + level: Level | list[Level] | None = None, + ) -> StylerRenderer: + r""" + Relabel the index keys to display a set of specified values. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + labels : Iterable + New labels to display. Must have same length as the original. + axis : {"index", 0, "columns", 1} + Apply to the index or columns. + level : int, str, list, optional + The level(s) over which to apply the new labels. + If `None` will apply to the last level(s) of an Index or MultiIndex. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.format_index: Format the text display value of index or column headers. + + Notes + ----- + None + + Examples + -------- + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]) + >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP + 2.000 MISS 4.000 + 0 1 2 3 + + Using a ``formatter`` specification on consistent dtypes in a level + + >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP + 2.00 MISS 4.00 + 0 1 2 3 + + """ + axis = self.data._get_axis_number(axis) + if axis == 0: + display_funcs_, obj = self._display_funcs_index, self.index + hidden_labels, hidden_lvls = self.hidden_rows, self.hide_index_ + else: + display_funcs_, obj = self._display_funcs_columns, self.columns + hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_ + visible_len = len(obj) - len(set(hidden_labels)) + visible_lvls = obj.nlevels - sum(hidden_lvls) + if len(labels) != visible_len: + raise ValueError( + "``labels`` must be of length equal to the number of " + "visible labels along ``axis``." + ) + if level is None and visible_lvls > 1: + error_msg = ( + f"``labels`` specified do not contain the same " + f"number of visible levels ({visible_lvls}) as the specified ``axis``: " + ) + try: + if len(labels[0]) != visible_lvls: + raise ValueError(error_msg) + except TypeError: + raise ValueError(error_msg) + level = [i for i in range(obj.nlevels) if not hidden_lvls[i]] + levels_ = refactor_levels(level, obj) + + def alias_(x, value): + return value + + for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]): + if len(levels_) == 1: + idx = (i, levels_[0]) if axis == 0 else (levels_[0], i) + display_funcs_[idx] = partial(alias_, value=labels[ai]) + else: + for aj, lvl in enumerate(levels_): + idx = (i, lvl) if axis == 0 else (lvl, i) + display_funcs_[idx] = partial(alias_, value=labels[ai][aj]) + + return self + def _element( html_element: str, From bac2032db80132d999b09772b0d6cd26095de145 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Tue, 26 Jul 2022 17:28:35 +0200 Subject: [PATCH 2/7] TEST: add proper tests --- pandas/io/formats/style_render.py | 128 ++++++++++++++----- pandas/tests/io/formats/style/test_format.py | 57 +++++++++ 2 files changed, 154 insertions(+), 31 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index fbfb89759cee4..d9400d696382a 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -8,7 +8,6 @@ Callable, DefaultDict, Dict, - Iterable, List, Optional, Sequence, @@ -1351,24 +1350,25 @@ def format_index( def relabel_index( self, - labels: Iterable, + labels: Sequence | Index, axis: Axis = 0, level: Level | list[Level] | None = None, ) -> StylerRenderer: r""" - Relabel the index keys to display a set of specified values. + Relabel the index, or column header, keys to display a set of specified values. .. versionadded:: 1.5.0 Parameters ---------- - labels : Iterable - New labels to display. Must have same length as the original. + labels : list-like or Index + New labels to display. Must have same length as the underlying values not + hidden. axis : {"index", 0, "columns", 1} Apply to the index or columns. level : int, str, list, optional - The level(s) over which to apply the new labels. - If `None` will apply to the last level(s) of an Index or MultiIndex. + The level(s) over which to apply the new labels. If `None` will apply + to all levels of an Index or MultiIndex which are not hidden. Returns ------- @@ -1377,26 +1377,98 @@ def relabel_index( See Also -------- Styler.format_index: Format the text display value of index or column headers. + Styler.hide: Hide the index, column headers, or specified data from display. Notes ----- - None + As part of Styler, this method allows the display of an index to be + completely user-specified without affecting the underlying DataFrame data, + index, or column headers. This means that the flexibility of indexing is + maintained whilst the final display is customisable. - Examples - -------- - Using ``na_rep`` and ``precision`` with the default ``formatter`` + Since Styler is designed to be progressively constructed with method chaining, + this method is adapted to react to the **currently specified hidden elements**. + This is useful because it means one does not have to specify all the new + labels if the majority of an index, or column headers, have already been hidden. + The following produce equivalent display (note the length of ``labels`` in + each case). - >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0]) - >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP - 2.000 MISS 4.000 - 0 1 2 3 + .. code-block:: python - Using a ``formatter`` specification on consistent dtypes in a level + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.relabel_index(["A", "B", "C"]).hide([0,1]) - >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP - 2.00 MISS 4.00 - 0 1 2 3 + .. code-block:: python + + df = pd.DataFrame({"col": ["a", "b", "c"]}) + df.style.hide([0,1]).relabel_index(["C"]) + + This method should be used, rather than :meth:`Styler.format_index`, in one of + the following cases (see examples): + + - A specified set of labels are required which are not a function of the + underlying index keys. + - The function of the underlying index keys requires a counter variable, + such as those available upon enumeration. + Examples + -------- + Basic use + + >>> df = pd.DataFrame({"col": ["a", "b", "c"]}) + >>> df.style.relabel_index(["A", "B", "C"]) # doctest: +SKIP + col + A a + B b + C c + + Chaining with pre-hidden elements + + >>> df.style.hide([0,1]).relabel_index(["C"]) # doctest: +SKIP + col + C c + + Using a MultiIndex + + >>> midx = pd.MultiIndex.from_product([[0, 1], [0, 1], [0, 1]]) + >>> df = pd.DataFrame({"col": list(range(8))}, index=midx) + >>> styler = df.style # doctest: +SKIP + col + 0 0 0 0 + 1 1 + 1 0 2 + 1 3 + 1 0 0 4 + 1 5 + 1 0 6 + 1 7 + >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0)) + >>> styler.hide(level=[0,1]) + >>> styler.relabel_index(["binary6", "binary7"]) + col + binary6 6 + binary7 7 + + We can also achieve the above by indexing first and then re-labeling + + >>> styler = df.loc[[(1,1,0), (1,1,1)]].style + >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"]) + col + binary6 6 + binary7 7 + + Defining a formatting function which uses an enumeration counter. Also note + that the value of the index key is passed in the case of string labels so it + can also be inserted into the label, using curly brackets (or double curly + brackets if the string if pre-formatted), + + >>> df = pd.DataFrame({"samples": np.random.rand(10)}) + >>> styler = df.loc[np.random.randint(0,10,3)].style + >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)]) + samples + sample1 (5) 0.315811 + sample2 (0) 0.495941 + sample3 (2) 0.067946 """ axis = self.data._get_axis_number(axis) if axis == 0: @@ -1406,26 +1478,20 @@ def relabel_index( display_funcs_, obj = self._display_funcs_columns, self.columns hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_ visible_len = len(obj) - len(set(hidden_labels)) - visible_lvls = obj.nlevels - sum(hidden_lvls) + # visible_lvls = obj.nlevels - sum(hidden_lvls) if len(labels) != visible_len: raise ValueError( "``labels`` must be of length equal to the number of " - "visible labels along ``axis``." + f"visible labels along ``axis`` ({visible_len})." ) - if level is None and visible_lvls > 1: - error_msg = ( - f"``labels`` specified do not contain the same " - f"number of visible levels ({visible_lvls}) as the specified ``axis``: " - ) - try: - if len(labels[0]) != visible_lvls: - raise ValueError(error_msg) - except TypeError: - raise ValueError(error_msg) + + if level is None: level = [i for i in range(obj.nlevels) if not hidden_lvls[i]] levels_ = refactor_levels(level, obj) def alias_(x, value): + if isinstance(value, str): + return value.format(x) return value for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]): diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index a52c679e16ad5..0b114ea128b0b 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -30,6 +30,20 @@ def styler(df): return Styler(df, uuid_len=0) +@pytest.fixture +def df_multi(): + return DataFrame( + data=np.arange(16).reshape(4, 4), + columns=MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["X", "Y"], ["x", "y"]]), + ) + + +@pytest.fixture +def styler_multi(df_multi): + return Styler(df_multi, uuid_len=0) + + def test_display_format(styler): ctx = styler.format("{:0.1f}")._translate(True, True) assert all(["display_value" in c for c in row] for row in ctx["body"]) @@ -442,3 +456,46 @@ def test_boolean_format(): ctx = df.style._translate(True, True) assert ctx["body"][0][1]["display_value"] is True assert ctx["body"][0][2]["display_value"] is False + + +@pytest.mark.parametrize( + "hide, labels", + [ + (False, [1, 2]), + (True, [1, 2, 3, 4]), + ], +) +def test_relabel_raise_length(styler_multi, hide, labels): + if hide: + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + with pytest.raises(ValueError, match="``labels`` must be of length equal"): + styler_multi.relabel_index(labels=labels) + + +def test_relabel_index(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=0, subset=[("X", "x"), ("Y", "y")]) + styler_multi.relabel_index(labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "X", "display_value": 1}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": 2}.items() <= ctx["body"][0][1].items() + assert {"value": "Y", "display_value": 3}.items() <= ctx["body"][1][0].items() + assert {"value": "x", "display_value": 4}.items() <= ctx["body"][1][1].items() + + +def test_relabel_columns(styler_multi): + labels = [(1, 2), (3, 4)] + styler_multi.hide(axis=1, subset=[("A", "a"), ("B", "b")]) + styler_multi.relabel_index(axis=1, labels=labels) + ctx = styler_multi._translate(True, True) + assert {"value": "A", "display_value": 1}.items() <= ctx["head"][0][3].items() + assert {"value": "B", "display_value": 3}.items() <= ctx["head"][0][4].items() + assert {"value": "b", "display_value": 2}.items() <= ctx["head"][1][3].items() + assert {"value": "a", "display_value": 4}.items() <= ctx["head"][1][4].items() + + +def test_relabel_roundtrip(styler): + styler.relabel_index(["{}", "{}"]) + ctx = styler._translate(True, True) + assert {"value": "x", "display_value": "x"}.items() <= ctx["body"][0][0].items() + assert {"value": "y", "display_value": "y"}.items() <= ctx["body"][1][0].items() From d59138e3757f955f2389d4306c3c68c48d3d74b5 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Tue, 26 Jul 2022 18:04:09 +0200 Subject: [PATCH 3/7] DOC: edit --- doc/source/reference/style.rst | 1 + pandas/io/formats/style_render.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 77e1b0abae0c4..5144f12fa373a 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -41,6 +41,7 @@ Style application Styler.applymap_index Styler.format Styler.format_index + Styler.relabel_index Styler.hide Styler.concat Styler.set_td_classes diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index d9400d696382a..7b914dc5e0168 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1395,11 +1395,10 @@ def relabel_index( .. code-block:: python + # relabel first, then hide df = pd.DataFrame({"col": ["a", "b", "c"]}) df.style.relabel_index(["A", "B", "C"]).hide([0,1]) - - .. code-block:: python - + # hide first, then relabel df = pd.DataFrame({"col": ["a", "b", "c"]}) df.style.hide([0,1]).relabel_index(["C"]) From c6256f8398d678c394f3e77c7b248adb9accbecf Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Tue, 26 Jul 2022 18:06:14 +0200 Subject: [PATCH 4/7] DOC: edit --- doc/source/whatsnew/v1.5.0.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7f07187e34c78..abcbaf8ebf388 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -44,6 +44,9 @@ e.g. totals and counts etc. (:issue:`43875`, :issue:`46186`) Additionally there is an alternative output method :meth:`.Styler.to_string`, which allows using the Styler's formatting methods to create, for example, CSVs (:issue:`44502`). +A new feature :meth:`.Styler.relabel_index` is also made available to provide full customisation of the display of +index or column headers (:issue:`XXXXX`) + Minor feature improvements are: - Adding the ability to render ``border`` and ``border-{side}`` CSS properties in Excel (:issue:`42276`) From 8e5fd7c50849739bcce650f43ea621f504be78ba Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Tue, 26 Jul 2022 20:25:28 +0200 Subject: [PATCH 5/7] DOC: skip doc test --- pandas/io/formats/style_render.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 7b914dc5e0168..4c91d51e89165 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1442,8 +1442,9 @@ def relabel_index( 1 0 6 1 7 >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0)) - >>> styler.hide(level=[0,1]) - >>> styler.relabel_index(["binary6", "binary7"]) + ... # doctest: +SKIP + >>> styler.hide(level=[0,1]) # doctest: +SKIP + >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP col binary6 6 binary7 7 @@ -1452,6 +1453,7 @@ def relabel_index( >>> styler = df.loc[[(1,1,0), (1,1,1)]].style >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"]) + ... # doctest: +SKIP col binary6 6 binary7 7 @@ -1464,6 +1466,7 @@ def relabel_index( >>> df = pd.DataFrame({"samples": np.random.rand(10)}) >>> styler = df.loc[np.random.randint(0,10,3)].style >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)]) + ... # doctest: +SKIP samples sample1 (5) 0.315811 sample2 (0) 0.495941 From aa7b2a8a40cde9feee28b0622718de85152ce732 Mon Sep 17 00:00:00 2001 From: JHM Darbyshire <24256554+attack68@users.noreply.github.com> Date: Wed, 27 Jul 2022 21:49:39 +0200 Subject: [PATCH 6/7] Update doc/source/whatsnew/v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index abcbaf8ebf388..d7117095d2321 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -45,7 +45,7 @@ Additionally there is an alternative output method :meth:`.Styler.to_string`, which allows using the Styler's formatting methods to create, for example, CSVs (:issue:`44502`). A new feature :meth:`.Styler.relabel_index` is also made available to provide full customisation of the display of -index or column headers (:issue:`XXXXX`) +index or column headers (:issue:`47864`) Minor feature improvements are: From 5921b901b340cb5416bc4fb2ea48d2e92b8f7f27 Mon Sep 17 00:00:00 2001 From: JHM Darbyshire <24256554+attack68@users.noreply.github.com> Date: Mon, 1 Aug 2022 22:36:56 +0200 Subject: [PATCH 7/7] Update pandas/io/formats/style_render.py --- pandas/io/formats/style_render.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 4c91d51e89165..d0a1cb6781e9f 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1480,7 +1480,6 @@ def relabel_index( display_funcs_, obj = self._display_funcs_columns, self.columns hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_ visible_len = len(obj) - len(set(hidden_labels)) - # visible_lvls = obj.nlevels - sum(hidden_lvls) if len(labels) != visible_len: raise ValueError( "``labels`` must be of length equal to the number of "