From f62070ea75ad5e04ea764f8426cccd8f012fb095 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Wed, 7 Sep 2022 13:29:35 +0800 Subject: [PATCH 1/4] EHN: Add index param to df.to_dict --- pandas/core/frame.py | 90 ++++++++++++++++------ pandas/tests/frame/methods/test_to_dict.py | 28 +++++++ 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4cdd62b038485..ffebaae4f0e7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1864,6 +1864,7 @@ def to_dict( "dict", "list", "series", "split", "tight", "records", "index" ] = "dict", into: type[dict] = dict, + index: bool = True, ) -> dict | list[dict]: """ Convert the DataFrame to a dictionary. @@ -1900,6 +1901,13 @@ def to_dict( instance of the mapping type you want. If you want a collections.defaultdict, you must pass it initialized. + index : bool, default True + Whether to include the index item (and index_names item if `orient` + is 'tight') in the returned dictionary. Can only be ``False`` + when `orient` is 'split' or 'tight'. + + .. versionadded:: 1.5.0 + Returns ------- dict, list or collections.abc.Mapping @@ -2005,6 +2013,11 @@ def to_dict( elif orient.startswith("i"): orient = "index" + if not index and orient not in ["split", "tight"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split' or 'tight'" + ) + if orient == "dict": return into_c((k, v.to_dict(into)) for k, v in self.items()) @@ -2014,36 +2027,65 @@ def to_dict( ) elif orient == "split": - return into_c( - ( - ("index", self.index.tolist()), - ("columns", self.columns.tolist()), + if index: + return into_c( ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ) + ) + else: + return into_c( + ( + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ) ) - ) elif orient == "tight": - return into_c( - ( - ("index", self.index.tolist()), - ("columns", self.columns.tolist()), + if index: + return into_c( ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), - ("index_names", list(self.index.names)), - ("column_names", list(self.columns.names)), + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ("index_names", list(self.index.names)), + ("column_names", list(self.columns.names)), + ) + ) + else: + return into_c( + ( + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ("column_names", list(self.columns.names)), + ) ) - ) elif orient == "series": return into_c((k, v) for k, v in self.items()) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index 6d5c32cae7368..613f7147a4a7d 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -421,3 +421,31 @@ def test_to_dict_returns_native_types(self, orient, data, expected_types): for i, key, value in assertion_iterator: assert value == data[key][i] assert type(value) is expected_types[key][i] + + @pytest.mark.parametrize("orient", ["dict", "list", "series", "records", "index"]) + def test_to_dict_index_false_error(self, orient): + # GH#46398 + df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"]) + msg = "'index=False' is only valid when 'orient' is 'split' or 'tight'" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient=orient, index=False) + + @pytest.mark.parametrize( + "orient, expected", + [ + ("split", {"columns": ["col1", "col2"], "data": [[1, 3], [2, 4]]}), + ( + "tight", + { + "columns": ["col1", "col2"], + "data": [[1, 3], [2, 4]], + "column_names": [None], + }, + ), + ], + ) + def test_to_dict_index_false(self, orient, expected): + # GH#46398 + df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"]) + result = df.to_dict(orient=orient, index=False) + tm.assert_dict_equal(result, expected) From 479daf30e7558f737b7915520b50f60b3f4786c4 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Wed, 7 Sep 2022 13:30:30 +0800 Subject: [PATCH 2/4] Add whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c479c59082464..5d57ead70e9eb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -331,6 +331,7 @@ Other enhancements - Added ``copy`` keyword to :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` to allow user to set axis on a new object without necessarily copying the underlying data (:issue:`47932`) - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) +- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From ca688213d8df6f785a2e0e600701e9b1fe67f0d1 Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 10 Sep 2022 20:07:20 +0800 Subject: [PATCH 3/4] Move whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 - doc/source/whatsnew/v1.6.0.rst | 1 + pandas/core/frame.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index f766884c79f0a..d8a319da2065e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -333,7 +333,6 @@ Other enhancements - :meth:`DataFrame.set_index` now supports a ``copy`` keyword. If ``False``, the underlying data is not copied when a new :class:`DataFrame` is returned (:issue:`48043`) - The method :meth:`.ExtensionArray.factorize` accepts ``use_na_sentinel=False`` for determining how null values are to be treated (:issue:`46601`) - The ``Dockerfile`` now installs a dedicated ``pandas-dev`` virtual environment for pandas development instead of using the ``base`` environment (:issue:`48427`) -- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index ee5085fd9ad89..0fa9838686acd 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) +- Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d86970df04ca7..1cf99cf549794 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1906,7 +1906,7 @@ def to_dict( is 'tight') in the returned dictionary. Can only be ``False`` when `orient` is 'split' or 'tight'. - .. versionadded:: 1.5.0 + .. versionadded:: 1.6.0 Returns ------- From e69e0d5fb401f7073819e4ec3e7838982ca16ebe Mon Sep 17 00:00:00 2001 From: yuanx749 Date: Sat, 10 Sep 2022 22:16:09 +0800 Subject: [PATCH 4/4] Refactor duplicate code --- pandas/core/frame.py | 77 ++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 53 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1cf99cf549794..c203893915d75 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2027,65 +2027,36 @@ def to_dict( ) elif orient == "split": - if index: - return into_c( - ( - ("index", self.index.tolist()), - ("columns", self.columns.tolist()), - ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), - ) - ) - else: - return into_c( + return into_c( + ((("index", self.index.tolist()),) if index else ()) + + ( + ("columns", self.columns.tolist()), ( - ("columns", self.columns.tolist()), - ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), - ) + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), ) + ) elif orient == "tight": - if index: - return into_c( - ( - ("index", self.index.tolist()), - ("columns", self.columns.tolist()), - ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), - ("index_names", list(self.index.names)), - ("column_names", list(self.columns.names)), - ) - ) - else: - return into_c( + return into_c( + ((("index", self.index.tolist()),) if index else ()) + + ( + ("columns", self.columns.tolist()), ( - ("columns", self.columns.tolist()), - ( - "data", - [ - list(map(maybe_box_native, t)) - for t in self.itertuples(index=False, name=None) - ], - ), - ("column_names", list(self.columns.names)), - ) + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), ) + + ((("index_names", list(self.index.names)),) if index else ()) + + (("column_names", list(self.columns.names)),) + ) elif orient == "series": return into_c((k, v) for k, v in self.items())