From 991b8cbbcb8fe6399a589dbe920c1aa6f906156b Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Thu, 29 Aug 2019 19:19:47 +0700 Subject: [PATCH 1/5] CLN: Update index parameter in pandas to_parquet --- pandas/core/frame.py | 7 +++---- pandas/io/parquet.py | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3d1a39a86c784..1a5ffd7c8a130 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2119,7 +2119,7 @@ def to_parquet( fname, engine="auto", compression="snappy", - index=None, + index=True, partition_cols=None, **kwargs ): @@ -2148,10 +2148,9 @@ def to_parquet( 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - index : bool, default None + index : bool, default True If ``True``, include the dataframe's index(es) in the file output. - If ``False``, they will not be written to the file. If ``None``, - the behavior depends on the chosen engine. + If ``False``, they will not be written to the file. .. versionadded:: 0.24.0 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6fc70e9f4a737..a5a48443e300a 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -206,7 +206,7 @@ def to_parquet( path, engine="auto", compression="snappy", - index=None, + index=True, partition_cols=None, **kwargs ): @@ -228,10 +228,9 @@ def to_parquet( 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - index : bool, default None + index : bool, default True If ``True``, include the dataframe's index(es) in the file output. If - ``False``, they will not be written to the file. If ``None``, the - engine's default behavior will be used. + ``False``, they will not be written to the file. .. versionadded:: 0.24.0 From 1e7c8a84383d1f4b74e763373f28ec669ca6cd13 Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Fri, 13 Sep 2019 15:39:49 +0700 Subject: [PATCH 2/5] Restore default parameter --- pandas/core/frame.py | 2 +- pandas/io/parquet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1a5ffd7c8a130..d792386164545 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2119,7 +2119,7 @@ def to_parquet( fname, engine="auto", compression="snappy", - index=True, + index=None, partition_cols=None, **kwargs ): diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index a5a48443e300a..ce2eb32876f2c 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -206,7 +206,7 @@ def to_parquet( path, engine="auto", compression="snappy", - index=True, + index=None, partition_cols=None, **kwargs ): From 50cb5f08bdb3c4b563a3db5bb3996cc84ca83b2d Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Fri, 13 Sep 2019 15:50:41 +0700 Subject: [PATCH 3/5] DOC: Update definition of index=None in pandas to_parquet --- pandas/core/frame.py | 1 + pandas/io/parquet.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d792386164545..e6368f182a03d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2151,6 +2151,7 @@ def to_parquet( index : bool, default True If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. + If ``None``, RangeIndex will be stored as metadata-only. .. versionadded:: 0.24.0 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index ce2eb32876f2c..b7df88c309e26 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -230,7 +230,8 @@ def to_parquet( Name of the compression to use. Use ``None`` for no compression. index : bool, default True If ``True``, include the dataframe's index(es) in the file output. If - ``False``, they will not be written to the file. + ``False``, they will not be written to the file. + If ``None``, RangeIndex will be stored as metadata-only. .. versionadded:: 0.24.0 From 66f119e054e2719e412426c062560d76900ebb55 Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Fri, 13 Sep 2019 15:52:26 +0700 Subject: [PATCH 4/5] Remove trailing whitespace --- pandas/io/parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index b7df88c309e26..f57c4aefd7917 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -230,7 +230,7 @@ def to_parquet( Name of the compression to use. Use ``None`` for no compression. index : bool, default True If ``True``, include the dataframe's index(es) in the file output. If - ``False``, they will not be written to the file. + ``False``, they will not be written to the file. If ``None``, RangeIndex will be stored as metadata-only. .. versionadded:: 0.24.0 From e0e9fbf05a2d9530a7563b24870d939cd7234d5e Mon Sep 17 00:00:00 2001 From: Galuh Sahid Date: Sun, 15 Sep 2019 22:33:00 +0700 Subject: [PATCH 5/5] DOC: Update default index in docstrings and expand explanation --- pandas/core/frame.py | 8 ++++++-- pandas/io/parquet.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6368f182a03d..75e57c2a4447f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2148,10 +2148,14 @@ def to_parquet( 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - index : bool, default True + index : bool, default None If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. - If ``None``, RangeIndex will be stored as metadata-only. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. .. versionadded:: 0.24.0 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index f57c4aefd7917..407305da30d11 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -228,10 +228,14 @@ def to_parquet( 'pyarrow' is unavailable. compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' Name of the compression to use. Use ``None`` for no compression. - index : bool, default True + index : bool, default None If ``True``, include the dataframe's index(es) in the file output. If ``False``, they will not be written to the file. - If ``None``, RangeIndex will be stored as metadata-only. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. .. versionadded:: 0.24.0