Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ I/O

- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`)
- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`)
- String support for paramater partition_cols in the :func:`pandas.to_parquet` (:issue:`27117`)
-

Plotting
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2156,9 +2156,10 @@ def to_parquet(

.. versionadded:: 0.24.0

partition_cols : list, optional, default None
Column names by which to partition the dataset
Columns are partitioned in the order they are given
partition_cols : list or string, optional, default None
Column names by which to partition the dataset.
Columns are partitioned in the order they are given.
String identifies a single column to be partitioned.

.. versionadded:: 0.24.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a .. versionchanged:: 1.0.0 explaining that passing a single string was added in 1.0?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I added it correctly. This is my first contribution so bear with me please :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can put the .. versionchanged:: ... right below the parameter explanation (so since there is already a ..versionadded:: 0.24.0, it would go just below that


Expand Down
9 changes: 6 additions & 3 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,15 +235,18 @@ def to_parquet(

.. versionadded:: 0.24.0

partition_cols : list, optional, default None
Column names by which to partition the dataset
Columns are partitioned in the order they are given
partition_cols : list or string, optional, default None
Column names by which to partition the dataset.
Columns are partitioned in the order they are given.
String identifies a single column to be partitioned.

.. versionadded:: 0.24.0

kwargs
Additional keyword arguments passed to the engine
"""
if isinstance(partition_cols, str):
partition_cols = [partition_cols]
impl = get_engine(engine)
return impl.write(
df,
Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,19 @@ def test_partition_cols_supported(self, pa, df_full):
assert len(dataset.partitions.partition_names) == 2
assert dataset.partitions.partition_names == set(partition_cols)

def test_partition_cols_string(self, pa, df_full):
# GH #23283
partition_cols = "bool"
partition_cols_list = [partition_cols]
df = df_full
with tm.ensure_clean_dir() as path:
df.to_parquet(path, partition_cols=partition_cols, compression=None)
import pyarrow.parquet as pq

dataset = pq.ParquetDataset(path, validate_schema=False)
assert len(dataset.partitions.partition_names) == 1
assert dataset.partitions.partition_names == set(partition_cols_list)

def test_empty_dataframe(self, pa):
# GH #27339
df = pd.DataFrame()
Expand Down Expand Up @@ -543,6 +556,23 @@ def test_partition_cols_supported(self, fp, df_full):
actual_partition_cols = fastparquet.ParquetFile(path, False).cats
assert len(actual_partition_cols) == 2

def test_partition_cols_string(self, fp, df_full):
# GH #23283
partition_cols = "bool"
df = df_full
with tm.ensure_clean_dir() as path:
df.to_parquet(
path,
engine="fastparquet",
partition_cols=partition_cols,
compression=None,
)
assert os.path.exists(path)
import fastparquet # noqa: F811

actual_partition_cols = fastparquet.ParquetFile(path, False).cats
assert len(actual_partition_cols) == 1

def test_partition_on_supported(self, fp, df_full):
# GH #23283
partition_cols = ["bool", "int"]
Expand Down