Skip to content

Commit dde77c3

Browse files
authored
Merge pull request #1 from JeanRoca/issue-27117
Issue 27117
2 parents 9f93d57 + ec927c3 commit dde77c3

File tree

3 files changed

+39
-6
lines changed

3 files changed

+39
-6
lines changed

pandas/core/frame.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,9 +2156,10 @@ def to_parquet(
21562156
21572157
.. versionadded:: 0.24.0
21582158
2159-
partition_cols : list, optional, default None
2160-
Column names by which to partition the dataset
2161-
Columns are partitioned in the order they are given
2159+
partition_cols : list or string, optional, default None
2160+
Column names by which to partition the dataset.
2161+
Columns are partitioned in the order they are given.
2162+
String identifies a single column to be partitioned.
21622163
21632164
.. versionadded:: 0.24.0
21642165

pandas/io/parquet.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,15 +235,18 @@ def to_parquet(
235235
236236
.. versionadded:: 0.24.0
237237
238-
partition_cols : list, optional, default None
239-
Column names by which to partition the dataset
240-
Columns are partitioned in the order they are given
238+
partition_cols : list or string, optional, default None
239+
Column names by which to partition the dataset.
240+
Columns are partitioned in the order they are given.
241+
String identifies a single column to be partitioned.
241242
242243
.. versionadded:: 0.24.0
243244
244245
kwargs
245246
Additional keyword arguments passed to the engine
246247
"""
248+
if isinstance(partition_cols, str):
249+
partition_cols = [partition_cols]
247250
impl = get_engine(engine)
248251
return impl.write(
249252
df,

pandas/tests/io/test_parquet.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,18 @@ def test_partition_cols_supported(self, pa, df_full):
473473
assert len(dataset.partitions.partition_names) == 2
474474
assert dataset.partitions.partition_names == set(partition_cols)
475475

476+
def test_partition_cols_string(self, pa, df_full):
477+
# GH #23283
478+
partition_cols = 'bool'
479+
df = df_full
480+
with tm.ensure_clean_dir() as path:
481+
df.to_parquet(path, partition_cols=partition_cols, compression=None)
482+
import pyarrow.parquet as pq
483+
484+
dataset = pq.ParquetDataset(path, validate_schema=False)
485+
assert len(dataset.partitions.partition_names) == 1
486+
assert dataset.partitions.partition_names == set([partition_cols])
487+
476488
def test_empty_dataframe(self, pa):
477489
# GH #27339
478490
df = pd.DataFrame()
@@ -543,6 +555,23 @@ def test_partition_cols_supported(self, fp, df_full):
543555
actual_partition_cols = fastparquet.ParquetFile(path, False).cats
544556
assert len(actual_partition_cols) == 2
545557

558+
def test_partition_cols_string(self, fp, df_full):
559+
# GH #23283
560+
partition_cols = 'bool'
561+
df = df_full
562+
with tm.ensure_clean_dir() as path:
563+
df.to_parquet(
564+
path,
565+
engine="fastparquet",
566+
partition_cols=partition_cols,
567+
compression=None,
568+
)
569+
assert os.path.exists(path)
570+
import fastparquet # noqa: F811
571+
572+
actual_partition_cols = fastparquet.ParquetFile(path, False).cats
573+
assert len(actual_partition_cols) == 1
574+
546575
def test_partition_on_supported(self, fp, df_full):
547576
# GH #23283
548577
partition_cols = ["bool", "int"]

0 commit comments

Comments
 (0)