Skip to content

Commit cbd831e

Browse files
feat(bigquery): expose date_as_object parameter to users (googleapis#150)
* feat(bigquery): expose date_as_object parameter for users * feat(bigquery): nit * feat(bigquery): add unit test for date as object without pyarrow * feat(bigquery): docs fixed * feat(bigquery): nit * feat(bigquery): nit
1 parent bc33a67 commit cbd831e

File tree

3 files changed

+96
-8
lines changed

3 files changed

+96
-8
lines changed

google/cloud/bigquery/job.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3320,6 +3320,7 @@ def to_dataframe(
33203320
dtypes=None,
33213321
progress_bar_type=None,
33223322
create_bqstorage_client=True,
3323+
date_as_object=True,
33233324
):
33243325
"""Return a pandas DataFrame from a QueryJob
33253326
@@ -3350,16 +3351,22 @@ def to_dataframe(
33503351
for details.
33513352
33523353
..versionadded:: 1.11.0
3353-
create_bqstorage_client (bool):
3354-
Optional. If ``True`` (default), create a BigQuery Storage API
3355-
client using the default API settings. The BigQuery Storage API
3354+
create_bqstorage_client (Optional[bool]):
3355+
If ``True`` (default), create a BigQuery Storage API client
3356+
using the default API settings. The BigQuery Storage API
33563357
is a faster way to fetch rows from BigQuery. See the
33573358
``bqstorage_client`` parameter for more information.
33583359
33593360
This argument does nothing if ``bqstorage_client`` is supplied.
33603361
33613362
..versionadded:: 1.24.0
33623363
3364+
date_as_object (Optional[bool]):
3365+
If ``True`` (default), cast dates to objects. If ``False``, convert
3366+
to datetime64[ns] dtype.
3367+
3368+
..versionadded:: 1.26.0
3369+
33633370
Returns:
33643371
A :class:`~pandas.DataFrame` populated with row data and column
33653372
headers from the query results. The column headers are derived
@@ -3373,6 +3380,7 @@ def to_dataframe(
33733380
dtypes=dtypes,
33743381
progress_bar_type=progress_bar_type,
33753382
create_bqstorage_client=create_bqstorage_client,
3383+
date_as_object=date_as_object,
33763384
)
33773385

33783386
def __iter__(self):

google/cloud/bigquery/table.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,6 +1633,7 @@ def to_dataframe(
16331633
dtypes=None,
16341634
progress_bar_type=None,
16351635
create_bqstorage_client=True,
1636+
date_as_object=True,
16361637
):
16371638
"""Create a pandas DataFrame by loading all pages of a query.
16381639
@@ -1673,16 +1674,22 @@ def to_dataframe(
16731674
progress bar as a graphical dialog box.
16741675
16751676
..versionadded:: 1.11.0
1676-
create_bqstorage_client (bool):
1677-
Optional. If ``True`` (default), create a BigQuery Storage API
1678-
client using the default API settings. The BigQuery Storage API
1677+
create_bqstorage_client (Optional[bool]):
1678+
If ``True`` (default), create a BigQuery Storage API client
1679+
using the default API settings. The BigQuery Storage API
16791680
is a faster way to fetch rows from BigQuery. See the
16801681
``bqstorage_client`` parameter for more information.
16811682
16821683
This argument does nothing if ``bqstorage_client`` is supplied.
16831684
16841685
..versionadded:: 1.24.0
16851686
1687+
date_as_object (Optional[bool]):
1688+
If ``True`` (default), cast dates to objects. If ``False``, convert
1689+
to datetime64[ns] dtype.
1690+
1691+
..versionadded:: 1.26.0
1692+
16861693
Returns:
16871694
pandas.DataFrame:
16881695
A :class:`~pandas.DataFrame` populated with row data and column
@@ -1722,7 +1729,7 @@ def to_dataframe(
17221729
bqstorage_client=bqstorage_client,
17231730
create_bqstorage_client=create_bqstorage_client,
17241731
)
1725-
df = record_batch.to_pandas()
1732+
df = record_batch.to_pandas(date_as_object=date_as_object)
17261733
for column in dtypes:
17271734
df[column] = pandas.Series(df[column], dtype=dtypes[column])
17281735
return df
@@ -1799,6 +1806,7 @@ def to_dataframe(
17991806
dtypes=None,
18001807
progress_bar_type=None,
18011808
create_bqstorage_client=True,
1809+
date_as_object=True,
18021810
):
18031811
"""Create an empty dataframe.
18041812
@@ -1807,6 +1815,7 @@ def to_dataframe(
18071815
dtypes (Any): Ignored. Added for compatibility with RowIterator.
18081816
progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
18091817
create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
1818+
date_as_object (bool): Ignored. Added for compatibility with RowIterator.
18101819
18111820
Returns:
18121821
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

tests/unit/test_job.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5504,7 +5504,15 @@ def test_to_dataframe_column_dtypes(self):
55045504
},
55055505
}
55065506
row_data = [
5507-
["1.4338368E9", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"],
5507+
[
5508+
"1.4338368E9",
5509+
"420",
5510+
"1.1",
5511+
"1.77",
5512+
"Cto_dataframeash",
5513+
"true",
5514+
"1999-12-01",
5515+
],
55085516
["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"],
55095517
["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"],
55105518
]
@@ -5533,6 +5541,69 @@ def test_to_dataframe_column_dtypes(self):
55335541
self.assertEqual(df.complete.dtype.name, "bool")
55345542
self.assertEqual(df.date.dtype.name, "object")
55355543

5544+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
5545+
@unittest.skipIf(pandas is None, "Requires `pandas`")
5546+
def test_to_dataframe_column_date_dtypes(self):
5547+
begun_resource = self._make_resource()
5548+
query_resource = {
5549+
"jobComplete": True,
5550+
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
5551+
"totalRows": "1",
5552+
"schema": {"fields": [{"name": "date", "type": "DATE"}]},
5553+
}
5554+
row_data = [
5555+
["1999-12-01"],
5556+
]
5557+
rows = [{"f": [{"v": field} for field in row]} for row in row_data]
5558+
query_resource["rows"] = rows
5559+
done_resource = copy.deepcopy(begun_resource)
5560+
done_resource["status"] = {"state": "DONE"}
5561+
connection = _make_connection(
5562+
begun_resource, query_resource, done_resource, query_resource
5563+
)
5564+
client = _make_client(project=self.PROJECT, connection=connection)
5565+
job = self._make_one(self.JOB_ID, self.QUERY, client)
5566+
df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False)
5567+
5568+
self.assertIsInstance(df, pandas.DataFrame)
5569+
self.assertEqual(len(df), 1) # verify the number of rows
5570+
exp_columns = [field["name"] for field in query_resource["schema"]["fields"]]
5571+
self.assertEqual(list(df), exp_columns) # verify the column names
5572+
5573+
self.assertEqual(df.date.dtype.name, "datetime64[ns]")
5574+
5575+
@unittest.skipIf(pandas is None, "Requires `pandas`")
5576+
def test_to_dataframe_column_date_dtypes_wo_pyarrow(self):
5577+
begun_resource = self._make_resource()
5578+
query_resource = {
5579+
"jobComplete": True,
5580+
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
5581+
"totalRows": "1",
5582+
"schema": {"fields": [{"name": "date", "type": "DATE"}]},
5583+
}
5584+
row_data = [
5585+
["1999-12-01"],
5586+
]
5587+
rows = [{"f": [{"v": field} for field in row]} for row in row_data]
5588+
query_resource["rows"] = rows
5589+
done_resource = copy.deepcopy(begun_resource)
5590+
done_resource["status"] = {"state": "DONE"}
5591+
connection = _make_connection(
5592+
begun_resource, query_resource, done_resource, query_resource
5593+
)
5594+
client = _make_client(project=self.PROJECT, connection=connection)
5595+
job = self._make_one(self.JOB_ID, self.QUERY, client)
5596+
5597+
with mock.patch("google.cloud.bigquery.table.pyarrow", None):
5598+
df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False)
5599+
5600+
self.assertIsInstance(df, pandas.DataFrame)
5601+
self.assertEqual(len(df), 1) # verify the number of rows
5602+
exp_columns = [field["name"] for field in query_resource["schema"]["fields"]]
5603+
self.assertEqual(list(df), exp_columns) # verify the column names
5604+
5605+
self.assertEqual(df.date.dtype.name, "object")
5606+
55365607
@unittest.skipIf(pandas is None, "Requires `pandas`")
55375608
@unittest.skipIf(tqdm is None, "Requires `tqdm`")
55385609
@mock.patch("tqdm.tqdm")

0 commit comments

Comments
 (0)