Merge pull request #1 from JeanRoca/issue-27117

unchecked9 · web-flow · commit dde77c3246f4 · 2019-08-17T20:30:57.000-05:00
Issue 27117
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2156,9 +2156,10 @@ def to_parquet(
 
             .. versionadded:: 0.24.0
 
-        partition_cols : list, optional, default None
-            Column names by which to partition the dataset
-            Columns are partitioned in the order they are given
+        partition_cols : list or string, optional, default None
+            Column names by which to partition the dataset.
+            Columns are partitioned in the order they are given.
+            String identifies a single column to be partitioned.
 
             .. versionadded:: 0.24.0
 
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -235,15 +235,18 @@ def to_parquet(
 
         .. versionadded:: 0.24.0
 
-    partition_cols : list, optional, default None
-        Column names by which to partition the dataset
-        Columns are partitioned in the order they are given
+    partition_cols : list or string, optional, default None
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
+        String identifies a single column to be partitioned.
 
         .. versionadded:: 0.24.0
 
     kwargs
         Additional keyword arguments passed to the engine
     """
+    if isinstance(partition_cols, str):
+        partition_cols = [partition_cols]
     impl = get_engine(engine)
     return impl.write(
         df,
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -473,6 +473,18 @@ def test_partition_cols_supported(self, pa, df_full):
             assert len(dataset.partitions.partition_names) == 2
             assert dataset.partitions.partition_names == set(partition_cols)
 
+    def test_partition_cols_string(self, pa, df_full):
+        # GH #23283
+        partition_cols = 'bool'
+        df = df_full
+        with tm.ensure_clean_dir() as path:
+            df.to_parquet(path, partition_cols=partition_cols, compression=None)
+            import pyarrow.parquet as pq
+
+            dataset = pq.ParquetDataset(path, validate_schema=False)
+            assert len(dataset.partitions.partition_names) == 1
+            assert dataset.partitions.partition_names == set([partition_cols])
+
     def test_empty_dataframe(self, pa):
         # GH #27339
         df = pd.DataFrame()
@@ -543,6 +555,23 @@ def test_partition_cols_supported(self, fp, df_full):
             actual_partition_cols = fastparquet.ParquetFile(path, False).cats
             assert len(actual_partition_cols) == 2
 
+    def test_partition_cols_string(self, fp, df_full):
+        # GH #23283
+        partition_cols = 'bool'
+        df = df_full
+        with tm.ensure_clean_dir() as path:
+            df.to_parquet(
+                path,
+                engine="fastparquet",
+                partition_cols=partition_cols,
+                compression=None,
+            )
+            assert os.path.exists(path)
+            import fastparquet  # noqa: F811
+
+            actual_partition_cols = fastparquet.ParquetFile(path, False).cats
+            assert len(actual_partition_cols) == 1
+
     def test_partition_on_supported(self, fp, df_full):
         # GH #23283
         partition_cols = ["bool", "int"]