Skip to content

Commit 97c9aaa

Browse files
vreyespuetswast
andauthored
ENH: add project id to destination table in to_gbq() (#347)
* ENH: add project id to destination table in to_gbq() * ENH: fix non-callable client error when adding project id to destination table * Update pandas_gbq/gbq.py (pass table reference) Co-authored-by: Tim Swast <[email protected]> * Update pandas_gbq/load.py (pass destination table) Co-authored-by: Tim Swast <[email protected]> * Update pandas_gbq/load.py (delete unnecessary variable) Co-authored-by: Tim Swast <[email protected]> * Update pandas_gbq/gbq.py (pass destination_table_ref) Co-authored-by: Tim Swast <[email protected]> * Fix call to load.load_chunks (now using only destination_table_ref) * add assertions for project ID to unit test * add to changelog * use project from credentials if none provided Co-authored-by: Tim Swast <[email protected]>
1 parent ac2d2fe commit 97c9aaa

File tree

4 files changed

+87
-18
lines changed

4 files changed

+87
-18
lines changed

docs/source/changelog.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
Changelog
22
=========
33

4+
.. _changelog-0.15.0:
5+
6+
0.15.0 / TBD
7+
------------
8+
9+
Features
10+
~~~~~~~~
11+
12+
- Load DataFrame with ``to_gbq`` to a table in a project different from the API
13+
client project. Specify the target table ID as ``project.dataset.table`` to
14+
use this feature. (:issue:`321`, :issue:`347`)
15+
16+
417
.. _changelog-0.14.1:
518

619
0.14.1 / 2020-11-10

pandas_gbq/gbq.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -604,8 +604,7 @@ def _download_results(
604604
def load_data(
605605
self,
606606
dataframe,
607-
dataset_id,
608-
table_id,
607+
destination_table_ref,
609608
chunksize=None,
610609
schema=None,
611610
progress_bar=True,
@@ -618,8 +617,7 @@ def load_data(
618617
chunks = load.load_chunks(
619618
self.client,
620619
dataframe,
621-
dataset_id,
622-
table_id,
620+
destination_table_ref,
623621
chunksize=chunksize,
624622
schema=schema,
625623
location=self.location,
@@ -1037,7 +1035,8 @@ def to_gbq(
10371035
dataframe : pandas.DataFrame
10381036
DataFrame to be written to a Google BigQuery table.
10391037
destination_table : str
1040-
Name of table to be written, in the form ``dataset.tablename``.
1038+
Name of table to be written, in the form ``dataset.tablename`` or
1039+
``project.dataset.tablename``.
10411040
project_id : str, optional
10421041
Google BigQuery Account project ID. Optional when available from
10431042
the environment.
@@ -1133,7 +1132,8 @@ def to_gbq(
11331132

11341133
if "." not in destination_table:
11351134
raise NotFoundException(
1136-
"Invalid Table Name. Should be of the form 'datasetId.tableId' "
1135+
"Invalid Table Name. Should be of the form 'datasetId.tableId' or "
1136+
"'projectId.datasetId.tableId'"
11371137
)
11381138

11391139
connector = GbqConnector(
@@ -1145,7 +1145,14 @@ def to_gbq(
11451145
private_key=private_key,
11461146
)
11471147
bqclient = connector.client
1148-
dataset_id, table_id = destination_table.rsplit(".", 1)
1148+
1149+
destination_table_ref = bigquery.table.TableReference.from_string(
1150+
destination_table, default_project=connector.project_id
1151+
)
1152+
1153+
project_id_table = destination_table_ref.project
1154+
dataset_id = destination_table_ref.dataset_id
1155+
table_id = destination_table_ref.table_id
11491156

11501157
default_schema = _generate_bq_schema(dataframe)
11511158
if not table_schema:
@@ -1157,10 +1164,10 @@ def to_gbq(
11571164

11581165
# If table exists, check if_exists parameter
11591166
try:
1160-
table = bqclient.get_table(destination_table)
1167+
table = bqclient.get_table(destination_table_ref)
11611168
except google_exceptions.NotFound:
11621169
table_connector = _Table(
1163-
project_id,
1170+
project_id_table,
11641171
dataset_id,
11651172
location=location,
11661173
credentials=connector.credentials,
@@ -1203,8 +1210,7 @@ def to_gbq(
12031210

12041211
connector.load_data(
12051212
dataframe,
1206-
dataset_id,
1207-
table_id,
1213+
destination_table_ref,
12081214
chunksize=chunksize,
12091215
schema=table_schema,
12101216
progress_bar=progress_bar,
@@ -1279,8 +1285,12 @@ def exists(self, table_id):
12791285
true if table exists, otherwise false
12801286
"""
12811287
from google.api_core.exceptions import NotFound
1288+
from google.cloud.bigquery import DatasetReference
1289+
from google.cloud.bigquery import TableReference
12821290

1283-
table_ref = self.client.dataset(self.dataset_id).table(table_id)
1291+
table_ref = TableReference(
1292+
DatasetReference(self.project_id, self.dataset_id), table_id
1293+
)
12841294
try:
12851295
self.client.get_table(table_ref)
12861296
return True
@@ -1300,12 +1310,14 @@ def create(self, table_id, schema):
13001310
Use the generate_bq_schema to generate your table schema from a
13011311
dataframe.
13021312
"""
1313+
from google.cloud.bigquery import DatasetReference
13031314
from google.cloud.bigquery import SchemaField
13041315
from google.cloud.bigquery import Table
1316+
from google.cloud.bigquery import TableReference
13051317

13061318
if self.exists(table_id):
13071319
raise TableCreationError(
1308-
"Table {0} already " "exists".format(table_id)
1320+
"Table {0} already exists".format(table_id)
13091321
)
13101322

13111323
if not _Dataset(self.project_id, credentials=self.credentials).exists(
@@ -1317,7 +1329,9 @@ def create(self, table_id, schema):
13171329
location=self.location,
13181330
).create(self.dataset_id)
13191331

1320-
table_ref = self.client.dataset(self.dataset_id).table(table_id)
1332+
table_ref = TableReference(
1333+
DatasetReference(self.project_id, self.dataset_id), table_id
1334+
)
13211335
table = Table(table_ref)
13221336

13231337
schema = pandas_gbq.schema.add_default_nullable_mode(schema)

pandas_gbq/load.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,11 @@ def encode_chunks(dataframe, chunksize=None):
5050
def load_chunks(
5151
client,
5252
dataframe,
53-
dataset_id,
54-
table_id,
53+
destination_table_ref,
5554
chunksize=None,
5655
schema=None,
5756
location=None,
5857
):
59-
destination_table = client.dataset(dataset_id).table(table_id)
6058
job_config = bigquery.LoadJobConfig()
6159
job_config.write_disposition = "WRITE_APPEND"
6260
job_config.source_format = "CSV"
@@ -77,7 +75,7 @@ def load_chunks(
7775
yield remaining_rows
7876
client.load_table_from_file(
7977
chunk_buffer,
80-
destination_table,
78+
destination_table_ref,
8179
job_config=job_config,
8280
location=location,
8381
).result()

tests/unit/test_gbq.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,50 @@ def test_to_gbq_w_empty_df(mock_bigquery_client):
257257
mock_bigquery_client.load_table_from_file.assert_not_called()
258258

259259

260+
def test_to_gbq_w_default_project(mock_bigquery_client):
261+
"""If no project is specified, we should be able to use project from
262+
default credentials.
263+
"""
264+
import google.api_core.exceptions
265+
from google.cloud.bigquery.table import TableReference
266+
267+
mock_bigquery_client.get_table.side_effect = (
268+
google.api_core.exceptions.NotFound("my_table")
269+
)
270+
gbq.to_gbq(DataFrame(), "my_dataset.my_table")
271+
272+
mock_bigquery_client.get_table.assert_called_with(
273+
TableReference.from_string("default-project.my_dataset.my_table")
274+
)
275+
mock_bigquery_client.create_table.assert_called_with(mock.ANY)
276+
table = mock_bigquery_client.create_table.call_args[0][0]
277+
assert table.project == "default-project"
278+
279+
280+
def test_to_gbq_w_project_table(mock_bigquery_client):
281+
"""If a project is included in the table ID, use that instead of the client
282+
project. See: https://github.com/pydata/pandas-gbq/issues/321
283+
"""
284+
import google.api_core.exceptions
285+
from google.cloud.bigquery.table import TableReference
286+
287+
mock_bigquery_client.get_table.side_effect = (
288+
google.api_core.exceptions.NotFound("my_table")
289+
)
290+
gbq.to_gbq(
291+
DataFrame(),
292+
"project_table.my_dataset.my_table",
293+
project_id="project_client",
294+
)
295+
296+
mock_bigquery_client.get_table.assert_called_with(
297+
TableReference.from_string("project_table.my_dataset.my_table")
298+
)
299+
mock_bigquery_client.create_table.assert_called_with(mock.ANY)
300+
table = mock_bigquery_client.create_table.call_args[0][0]
301+
assert table.project == "project_table"
302+
303+
260304
def test_to_gbq_creates_dataset(mock_bigquery_client):
261305
import google.api_core.exceptions
262306

0 commit comments

Comments
 (0)