Skip to content

Commit 7345159

Browse files
committed
ENH: Add table_schema parameter for user-defined BigQuery schema (#46)
1 parent 9eb9d77 commit 7345159

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

docs/source/changelog.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
Changelog
22
=========
33

4-
0.2.1 / 2017-??-??
4+
0.3.0 / 2017-??-??
55
------------------
66

77
- :func:`read_gbq` now raises ``QueryTimeout`` if the request exceeds the ``query.timeoutMs`` value specified in the BigQuery configuration. (:issue:`76`)
88
- Environment variable ``PANDAS_GBQ_CREDENTIALS_FILE`` can now be used to override the default location where the BigQuery user account credentials are stored. (:issue:`86`)
99
- BigQuery user account credentials are now stored in an application-specific hidden user folder on the operating system. (:issue:`41`)
10+
- Add support for a passed schema in :func:``to_gbq`` instead inferring the schema from the passed ``DataFrame`` with ``DataFrame.dtypes`` (:issue:`46`)
1011

1112
0.2.0 / 2017-07-24
1213
------------------

pandas_gbq/gbq.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
10171017

10181018
def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
10191019
verbose=True, reauth=False, if_exists='fail', private_key=None,
1020-
auth_local_webserver=False):
1020+
auth_local_webserver=False, table_schema=None):
10211021
"""Write a DataFrame to a Google BigQuery table.
10221022
10231023
The main method a user calls to export pandas DataFrame contents to
@@ -1075,6 +1075,13 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
10751075
.. [console flow]
10761076
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
10771077
.. versionadded:: 0.2.0
1078+
table_schema : list of dicts
1079+
List of BigQuery table fields to which according DataFrame columns
1080+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
1081+
schema is not provided, it will be generated according to dtypes
1082+
of DataFrame columns. See BigQuery API documentation on available
1083+
names of a field.
1084+
.. versionadded:: 0.3.0
10781085
"""
10791086

10801087
_test_google_api_imports()
@@ -1094,7 +1101,10 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
10941101
table = _Table(project_id, dataset_id, reauth=reauth,
10951102
private_key=private_key)
10961103

1097-
table_schema = _generate_bq_schema(dataframe)
1104+
if not table_schema:
1105+
table_schema = _generate_bq_schema(dataframe)
1106+
else:
1107+
table_schema = dict(fields=table_schema)
10981108

10991109
# If table exists, check if_exists parameter
11001110
if table.exists(table_id):

pandas_gbq/tests/test_gbq.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,6 +1312,48 @@ def test_schema_is_subset_fails_if_not_subset(self):
13121312
assert self.sut.schema_is_subset(
13131313
dataset, table_name, tested_schema) is False
13141314

1315+
def test_upload_data_with_valid_user_schema(self):
1316+
# Issue #46; tests test scenarios with user-provided
1317+
# schemas
1318+
df = tm.makeMixedDataFrame()
1319+
test_id = "15"
1320+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1321+
{'name': 'B', 'type': 'FLOAT'},
1322+
{'name': 'C', 'type': 'STRING'},
1323+
{'name': 'D', 'type': 'TIMESTAMP'}]
1324+
destination_table = self.destination_table + test_id
1325+
gbq.to_gbq(df, destination_table, _get_project_id(),
1326+
private_key=_get_private_key_path(),
1327+
table_schema=test_schema)
1328+
dataset, table = destination_table.split('.')
1329+
assert self.table.verify_schema(dataset, table,
1330+
dict(fields=test_schema))
1331+
1332+
def test_upload_data_with_invalid_user_schema_raises_error(self):
1333+
df = tm.makeMixedDataFrame()
1334+
test_id = "16"
1335+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1336+
{'name': 'B', 'type': 'FLOAT'},
1337+
{'name': 'C', 'type': 'FLOAT'},
1338+
{'name': 'D', 'type': 'FLOAT'}]
1339+
destination_table = self.destination_table + test_id
1340+
with tm.assertRaises(gbq.StreamingInsertError):
1341+
gbq.to_gbq(df, destination_table, _get_project_id(),
1342+
private_key=_get_private_key_path(),
1343+
table_schema=test_schema)
1344+
1345+
def test_upload_data_with_missing_schema_fields_raises_error(self):
1346+
df = tm.makeMixedDataFrame()
1347+
test_id = "16"
1348+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1349+
{'name': 'B', 'type': 'FLOAT'},
1350+
{'name': 'C', 'type': 'FLOAT'}]
1351+
destination_table = self.destination_table + test_id
1352+
with tm.assertRaises(gbq.StreamingInsertError):
1353+
gbq.to_gbq(df, destination_table, _get_project_id(),
1354+
private_key=_get_private_key_path(),
1355+
table_schema=test_schema)
1356+
13151357
def test_list_dataset(self):
13161358
dataset_id = self.dataset_prefix + "1"
13171359
assert dataset_id in self.dataset.datasets()

0 commit comments

Comments
 (0)