Skip to content

Commit 296f15a

Browse files
authored
ENH: Add table_schema parameter for user-defined BigQuery schema (#46)
* ENH: Add table_schema parameter for user-defined BigQuery schema (#46) * remove unsupported gbq exception and replace with a generic one * fix versionadded for to_gbq table_schema parameter * fix test id numbering * fix tests by using pytest raise asserts
1 parent 183daf1 commit 296f15a

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

docs/source/changelog.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changelog
66
------------------
77

88
- Fix an issue where Unicode couldn't be uploaded in Python 2 (:issue:`93`)
9+
- Add support for a passed schema in :func:``to_gbq`` instead inferring the schema from the passed ``DataFrame`` with ``DataFrame.dtypes`` (:issue:`46`)
910

1011

1112
0.3.0 / 2018-01-03

pandas_gbq/gbq.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
890890

891891
def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
892892
verbose=True, reauth=False, if_exists='fail', private_key=None,
893-
auth_local_webserver=False):
893+
auth_local_webserver=False, table_schema=None):
894894
"""Write a DataFrame to a Google BigQuery table.
895895
896896
The main method a user calls to export pandas DataFrame contents to
@@ -948,6 +948,13 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
948948
.. [console flow]
949949
http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
950950
.. versionadded:: 0.2.0
951+
table_schema : list of dicts
952+
List of BigQuery table fields to which according DataFrame columns
953+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
954+
schema is not provided, it will be generated according to dtypes
955+
of DataFrame columns. See BigQuery API documentation on available
956+
names of a field.
957+
.. versionadded:: 0.3.1
951958
"""
952959

953960
_test_google_api_imports()
@@ -967,7 +974,10 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
967974
table = _Table(project_id, dataset_id, reauth=reauth,
968975
private_key=private_key)
969976

970-
table_schema = _generate_bq_schema(dataframe)
977+
if not table_schema:
978+
table_schema = _generate_bq_schema(dataframe)
979+
else:
980+
table_schema = dict(fields=table_schema)
971981

972982
# If table exists, check if_exists parameter
973983
if table.exists(table_id):

pandas_gbq/tests/test_gbq.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def make_mixed_dataframe_v2(test_size):
174174

175175
def test_generate_bq_schema_deprecated():
176176
# 11121 Deprecation of generate_bq_schema
177-
with tm.assert_produces_warning(FutureWarning):
177+
with pytest.warns(FutureWarning):
178178
df = make_mixed_dataframe_v2(10)
179179
gbq.generate_bq_schema(df)
180180

@@ -1422,6 +1422,48 @@ def test_schema_is_subset_fails_if_not_subset(self):
14221422
assert self.sut.schema_is_subset(
14231423
dataset, table_name, tested_schema) is False
14241424

1425+
def test_upload_data_with_valid_user_schema(self):
1426+
# Issue #46; tests test scenarios with user-provided
1427+
# schemas
1428+
df = tm.makeMixedDataFrame()
1429+
test_id = "18"
1430+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1431+
{'name': 'B', 'type': 'FLOAT'},
1432+
{'name': 'C', 'type': 'STRING'},
1433+
{'name': 'D', 'type': 'TIMESTAMP'}]
1434+
destination_table = self.destination_table + test_id
1435+
gbq.to_gbq(df, destination_table, _get_project_id(),
1436+
private_key=_get_private_key_path(),
1437+
table_schema=test_schema)
1438+
dataset, table = destination_table.split('.')
1439+
assert self.table.verify_schema(dataset, table,
1440+
dict(fields=test_schema))
1441+
1442+
def test_upload_data_with_invalid_user_schema_raises_error(self):
1443+
df = tm.makeMixedDataFrame()
1444+
test_id = "19"
1445+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1446+
{'name': 'B', 'type': 'FLOAT'},
1447+
{'name': 'C', 'type': 'FLOAT'},
1448+
{'name': 'D', 'type': 'FLOAT'}]
1449+
destination_table = self.destination_table + test_id
1450+
with pytest.raises(gbq.GenericGBQException):
1451+
gbq.to_gbq(df, destination_table, _get_project_id(),
1452+
private_key=_get_private_key_path(),
1453+
table_schema=test_schema)
1454+
1455+
def test_upload_data_with_missing_schema_fields_raises_error(self):
1456+
df = tm.makeMixedDataFrame()
1457+
test_id = "20"
1458+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1459+
{'name': 'B', 'type': 'FLOAT'},
1460+
{'name': 'C', 'type': 'FLOAT'}]
1461+
destination_table = self.destination_table + test_id
1462+
with pytest.raises(gbq.GenericGBQException):
1463+
gbq.to_gbq(df, destination_table, _get_project_id(),
1464+
private_key=_get_private_key_path(),
1465+
table_schema=test_schema)
1466+
14251467
def test_list_dataset(self):
14261468
dataset_id = self.dataset_prefix + "1"
14271469
assert dataset_id in self.dataset.datasets()

0 commit comments

Comments
 (0)