From e6925cc68e3a620268eb0e9522853820e8a4bb73 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 10:18:06 -0700 Subject: [PATCH 1/4] Add bigquery create table sample Change-Id: I091422c95a190c4aeca8eef9a2d154a5ad90cd4d --- bigquery/cloud-client/snippets.py | 33 ++++++++++++++++++++++++++ bigquery/cloud-client/snippets_test.py | 15 ++++++++++++ 2 files changed, 48 insertions(+) diff --git a/bigquery/cloud-client/snippets.py b/bigquery/cloud-client/snippets.py index 49272965b41..a9471a8494e 100644 --- a/bigquery/cloud-client/snippets.py +++ b/bigquery/cloud-client/snippets.py @@ -82,6 +82,32 @@ def list_tables(dataset_name, project=None): print(table.name) +def create_table(dataset_name, table_name, project=None): + """Creates a simple table in the given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + + if not dataset.exists(): + print('Dataset {} does not exist.'.format(dataset_name)) + return + + table = dataset.table(table_name) + + # Set the table schema + table.schema = ( + bigquery.SchemaField('Name', 'STRING'), + bigquery.SchemaField('Age', 'INTEGER'), + bigquery.SchemaField('Weight', 'FLOAT'), + ) + + table.create() + + print('Created table {} in dataset {}.'.format(table_name, dataset_name)) + + def list_rows(dataset_name, table_name, project=None): """Prints rows in the given table. @@ -155,6 +181,11 @@ def delete_table(dataset_name, table_name, project=None): 'list-tables', help=list_tables.__doc__) list_tables_parser.add_argument('dataset_name') + create_table_parser = subparsers.add_parser( + 'create-table', help=create_table.__doc__) + create_table_parser.add_argument('dataset_name') + create_table_parser.add_argument('table_name') + list_rows_parser = subparsers.add_parser( 'list-rows', help=list_rows.__doc__) list_rows_parser.add_argument('dataset_name') @@ -171,6 +202,8 @@ def delete_table(dataset_name, table_name, project=None): list_datasets(args.project) elif args.command == 'list-tables': list_tables(args.dataset_name, args.project) + elif args.command == 'create-table': + create_table(args.dataset_name, args.table_name, args.project) elif args.command == 'list-rows': list_rows(args.dataset_name, args.table_name, args.project) elif args.command == 'delete-table': diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py index ed2f47639fc..de0aab26ce3 100644 --- a/bigquery/cloud-client/snippets_test.py +++ b/bigquery/cloud-client/snippets_test.py @@ -62,6 +62,21 @@ def test_list_rows(capsys): assert 'Age' in out +def test_create_table(capsys): + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + table = dataset.table('test_create_table') + + if table.exists(): + table.delete() + + try: + snippets.create_table(DATASET_ID, table.name) + assert table.exists() + finally: + table.delete() + + def test_delete_table(capsys): # Create a table to delete bigquery_client = bigquery.Client() From 7dab36158e1146186fc2c778bdf884d4d72e3f78 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 11:01:52 -0700 Subject: [PATCH 2/4] Add copy table sample Change-Id: I4d46078bf7ac514e8a7f82cb654e15748e4ae610 --- bigquery/cloud-client/snippets.py | 55 ++++++++++++++++++++++++++ bigquery/cloud-client/snippets_test.py | 40 ++++++++++++++----- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/bigquery/cloud-client/snippets.py b/bigquery/cloud-client/snippets.py index a9471a8494e..d201a84cba8 100644 --- a/bigquery/cloud-client/snippets.py +++ b/bigquery/cloud-client/snippets.py @@ -25,8 +25,11 @@ """ import argparse +import time +import uuid from gcloud import bigquery +import gcloud.bigquery.job def list_projects(): @@ -152,6 +155,50 @@ def list_rows(dataset_name, table_name, project=None): print(format_string.format(*row)) +def copy_table(dataset_name, table_name, new_table_name, project=None): + """Copies a table. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + # This sample shows the destination table in the same dataset and project, + # however, it's possible to copy across datasets and projects. You can + # also copy muliple source tables into a single destination table by + # providing addtional arguments to `copy_table`. + destination_table = dataset.table(new_table_name) + + # Create a job to copy the table to the destination table. + job_id = str(uuid.uuid4()) + job = bigquery_client.copy_table( + job_id, destination_table, table) + + # Create the table if it doesn't exist. + job.create_disposition = ( + gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + + # Start the job. + job.begin() + + # Wait for the the job to finish. + print('Waiting for job to finish...') + wait_for_job(job) + + print('Table {} copied to {}.'.format(table_name, new_table_name)) + + +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + def delete_table(dataset_name, table_name, project=None): """Deletes a table in a given dataset. @@ -191,6 +238,12 @@ def delete_table(dataset_name, table_name, project=None): list_rows_parser.add_argument('dataset_name') list_rows_parser.add_argument('table_name') + copy_table_parser = subparsers.add_parser( + 'copy-table', help=copy_table.__doc__) + copy_table_parser.add_argument('dataset_name') + copy_table_parser.add_argument('table_name') + copy_table_parser.add_argument('new_table_name') + delete_table_parser = subparsers.add_parser( 'delete-table', help=delete_table.__doc__) delete_table_parser.add_argument('dataset_name') @@ -206,5 +259,7 @@ def delete_table(dataset_name, table_name, project=None): create_table(args.dataset_name, args.table_name, args.project) elif args.command == 'list-rows': list_rows(args.dataset_name, args.table_name, args.project) + elif args.command == 'copy-table': + copy_table(args.dataset_name, args.table_name, args.new_table_name) elif args.command == 'delete-table': delete_table(args.dataset_name, args.table_name, args.project) diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py index de0aab26ce3..441bad63d7d 100644 --- a/bigquery/cloud-client/snippets_test.py +++ b/bigquery/cloud-client/snippets_test.py @@ -62,22 +62,42 @@ def test_list_rows(capsys): assert 'Age' in out -def test_create_table(capsys): +@pytest.fixture +def temporary_table(): + """Fixture that returns a factory for tables that do not yet exist and + will be automatically deleted after the test.""" bigquery_client = bigquery.Client() dataset = bigquery_client.dataset(DATASET_ID) - table = dataset.table('test_create_table') + tables = [] - if table.exists(): - table.delete() + def factory(table_name): + new_table = dataset.table('test_create_table') + if new_table.exists(): + new_table.delete() + tables.append(new_table) + return new_table - try: - snippets.create_table(DATASET_ID, table.name) - assert table.exists() - finally: - table.delete() + yield factory + for table in tables: + if table.exists(): + table.delete() -def test_delete_table(capsys): + +def test_create_table(temporary_table): + new_table = temporary_table('test_create_table') + snippets.create_table(DATASET_ID, new_table.name) + assert new_table.exists() + + +@pytest.mark.slow +def test_copy_table(temporary_table): + new_table = temporary_table('test_copy_table') + snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name) + assert new_table.exists() + + +def test_delete_table(): # Create a table to delete bigquery_client = bigquery.Client() dataset = bigquery_client.dataset(DATASET_ID) From 2080c808678f32d91fd04050c5618200fa5e2c82 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 11:04:28 -0700 Subject: [PATCH 3/4] Fix test table ids Change-Id: Ic6132f77754b8bf2f7c7fbfa7d39cca34f8c045a --- bigquery/cloud-client/export_data_to_gcs_test.py | 2 +- bigquery/cloud-client/snippets_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/cloud-client/export_data_to_gcs_test.py b/bigquery/cloud-client/export_data_to_gcs_test.py index e260e47b4f7..acbbe50e55e 100644 --- a/bigquery/cloud-client/export_data_to_gcs_test.py +++ b/bigquery/cloud-client/export_data_to_gcs_test.py @@ -15,7 +15,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' def test_export_data_to_gcs(cloud_config, capsys): diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py index 441bad63d7d..67cac0c9408 100644 --- a/bigquery/cloud-client/snippets_test.py +++ b/bigquery/cloud-client/snippets_test.py @@ -19,7 +19,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' @pytest.mark.xfail( From 993ee365ca781be9296d27157b28d9ee5d9bec59 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 12:58:59 -0700 Subject: [PATCH 4/4] Fix usage of table_name Change-Id: I8302ec9ffdd4c1d63156faf9642c63d3547e2d43 --- bigquery/cloud-client/snippets_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py index 67cac0c9408..372cbc834bf 100644 --- a/bigquery/cloud-client/snippets_test.py +++ b/bigquery/cloud-client/snippets_test.py @@ -71,7 +71,7 @@ def temporary_table(): tables = [] def factory(table_name): - new_table = dataset.table('test_create_table') + new_table = dataset.table(table_name) if new_table.exists(): new_table.delete() tables.append(new_table)