diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index c476e33b8ddde..aa6bf44547040 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -296,6 +296,22 @@ For instance, cheese.melt(id_vars=['first', 'last']) cheese.melt(id_vars=['first', 'last'], var_name='quantity') +When transforming a DataFrame using :func:`~pandas.melt`, the index will be ignored. The original index values can be kept around by setting the ``ignore_index`` parameter to ``False`` (default is ``True``). This will however duplicate them. + +.. versionadded:: 1.1.0 + +.. ipython:: python + + index = pd.MultiIndex.from_tuples([('person', 'A'), ('person', 'B')]) + cheese = pd.DataFrame({'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}, + index=index) + cheese + cheese.melt(id_vars=['first', 'last']) + cheese.melt(id_vars=['first', 'last'], ignore_index=False) + Another way to transform is to use the :func:`~pandas.wide_to_long` panel data convenience function. It is less flexible than :func:`~pandas.melt`, but more user-friendly. diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 75f406d908c73..73b14b83d8444 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -287,6 +287,7 @@ Other enhancements This can be used to set a custom compression level, e.g., ``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}`` (:issue:`33196`) +- :meth:`melt` has gained an ``ignore_index`` (default ``True``) argument that, if set to ``False``, prevents the method from dropping the index (:issue:`17440`). - :meth:`Series.update` now accepts objects that can be coerced to a :class:`Series`, such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`) - :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`) @@ -1143,3 +1144,4 @@ Other Contributors ~~~~~~~~~~~~ + diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 102c457f94a95..9c223d66b727b 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -6,7 +6,6 @@ from pandas._config import option_context -from pandas._libs import reduction as libreduction from pandas._typing import Axis from pandas.util._decorators import cache_readonly diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 521d16ac0b905..b40c6b4927e97 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2140,7 +2140,7 @@ def to_stata( from pandas.io.stata import StataWriter117 as statawriter # type: ignore else: # versions 118 and 119 # mypy: Name 'statawriter' already defined (possibly by an import) - from pandas.io.stata import StataWriterUTF8 as statawriter # type:ignore + from pandas.io.stata import StataWriterUTF8 as statawriter # type: ignore kwargs: Dict[str, Any] = {} if version is None or version >= 117: @@ -7086,6 +7086,7 @@ def melt( var_name=None, value_name="value", col_level=None, + ignore_index=True, ) -> "DataFrame": return melt( @@ -7095,6 +7096,7 @@ def melt( var_name=var_name, value_name=value_name, col_level=col_level, + ignore_index=ignore_index, ) # ---------------------------------------------------------------------- diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index cd0619738677d..caf9e2fd7be57 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -13,6 +13,7 @@ import pandas.core.common as com from pandas.core.indexes.api import Index, MultiIndex from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import _tile_compat from pandas.core.shared_docs import _shared_docs from pandas.core.tools.numeric import to_numeric @@ -31,8 +32,8 @@ def melt( var_name=None, value_name="value", col_level=None, + ignore_index: bool = True, ) -> "DataFrame": - # TODO: what about the existing index? # If multiindex, gather names of columns on all level for checking presence # of `id_vars` and `value_vars` if isinstance(frame.columns, MultiIndex): @@ -121,7 +122,12 @@ def melt( # asanyarray will keep the columns as an Index mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N) - return frame._constructor(mdata, columns=mcolumns) + result = frame._constructor(mdata, columns=mcolumns) + + if not ignore_index: + result.index = _tile_compat(frame.index, K) + + return result @deprecate_kwarg(old_arg_name="label", new_arg_name=None) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 1894f551afea5..b81942f062b19 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -28,6 +28,11 @@ Name to use for the 'value' column. col_level : int or str, optional If columns are a MultiIndex then use this level to melt. + ignore_index : bool, default True + If True, original index is ignored. If False, the original index is retained. + Index labels will be repeated as necessary. + + .. versionadded:: 1.1.0 Returns ------- @@ -78,6 +83,17 @@ 1 b B 3 2 c B 5 + Original index values can be kept around: + + >>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 0 a C 2 + 1 b C 4 + 2 c C 6 + If you have multi-index columns: >>> df.columns = [list('ABC'), list('DEF')] diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 000a6354277ab..923595038c5eb 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -357,6 +357,47 @@ def test_melt_mixed_int_str_value_vars(self): expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]}) tm.assert_frame_equal(result, expected) + def test_ignore_index(self): + # GH 17440 + df = DataFrame({"foo": [0], "bar": [1]}, index=["first"]) + result = melt(df, ignore_index=False) + expected = DataFrame( + {"variable": ["foo", "bar"], "value": [0, 1]}, index=["first", "first"] + ) + tm.assert_frame_equal(result, expected) + + def test_ignore_multiindex(self): + # GH 17440 + index = pd.MultiIndex.from_tuples( + [("first", "second"), ("first", "third")], names=["baz", "foobar"] + ) + df = DataFrame({"foo": [0, 1], "bar": [2, 3]}, index=index) + result = melt(df, ignore_index=False) + + expected_index = pd.MultiIndex.from_tuples( + [("first", "second"), ("first", "third")] * 2, names=["baz", "foobar"] + ) + expected = DataFrame( + {"variable": ["foo"] * 2 + ["bar"] * 2, "value": [0, 1, 2, 3]}, + index=expected_index, + ) + + tm.assert_frame_equal(result, expected) + + def test_ignore_index_name_and_type(self): + # GH 17440 + index = pd.Index(["foo", "bar"], dtype="category", name="baz") + df = DataFrame({"x": [0, 1], "y": [2, 3]}, index=index) + result = melt(df, ignore_index=False) + + expected_index = pd.Index(["foo", "bar"] * 2, dtype="category", name="baz") + expected = DataFrame( + {"variable": ["x", "x", "y", "y"], "value": [0, 1, 2, 3]}, + index=expected_index, + ) + + tm.assert_frame_equal(result, expected) + class TestLreshape: def test_pairs(self):