From 3964a8b7cb3af4082a210289d44298c80e49a3a0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 24 Sep 2019 15:16:58 -0700 Subject: [PATCH] Update pandas/bqstorage samples to latest library changes. --- .../pandas-gbq-migration/requirements.txt | 7 +- bigquery/pandas-gbq-migration/samples_test.py | 71 +++++++++++++++---- bigquery_storage/to_dataframe/jupyter_test.py | 49 +++---------- .../to_dataframe/requirements.txt | 8 +-- noxfile.py | 3 +- 5 files changed, 78 insertions(+), 60 deletions(-) diff --git a/bigquery/pandas-gbq-migration/requirements.txt b/bigquery/pandas-gbq-migration/requirements.txt index 4886f85fa0a..e4c7c1b0c2e 100644 --- a/bigquery/pandas-gbq-migration/requirements.txt +++ b/bigquery/pandas-gbq-migration/requirements.txt @@ -1,2 +1,5 @@ -google-cloud-bigquery[pandas,pyarrow]==1.9.0 -pandas-gbq==0.9.0 +google-cloud-bigquery==1.20.0 +google-cloud-bigquery-storage==0.7.0 +pandas==0.25.1 +pandas-gbq==0.11.0 +pyarrow==0.14.1 diff --git a/bigquery/pandas-gbq-migration/samples_test.py b/bigquery/pandas-gbq-migration/samples_test.py index b7e982f60fa..b237234f6b1 100644 --- a/bigquery/pandas-gbq-migration/samples_test.py +++ b/bigquery/pandas-gbq-migration/samples_test.py @@ -81,6 +81,41 @@ def test_pandas_gbq_query(): assert len(df) > 0 +def test_client_library_query_bqstorage(): + # [START bigquery_migration_client_library_query_bqstorage] + import google.auth + from google.cloud import bigquery + from google.cloud import bigquery_storage_v1beta1 + + # Create a BigQuery client and a BigQuery Storage API client with the same + # credentials to avoid authenticating twice. + credentials, project_id = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) + client = bigquery.Client(credentials=credentials, project=project_id) + bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + credentials=credentials + ) + sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`" + + # Use a BigQuery Storage API client to download results more quickly. + df = client.query(sql).to_dataframe(bqstorage_client=bqstorage_client) + # [END bigquery_migration_client_library_query_bqstorage] + assert len(df) > 0 + + +def test_pandas_gbq_query_bqstorage(): + # [START bigquery_migration_pandas_gbq_query_bqstorage] + import pandas + + sql = "SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012`" + + # Use the BigQuery Storage API to download results more quickly. + df = pandas.read_gbq(sql, dialect='standard', use_bqstorage_api=True) + # [END bigquery_migration_pandas_gbq_query_bqstorage] + assert len(df) > 0 + + def test_client_library_legacy_query(): # [START bigquery_migration_client_library_query_legacy] from google.cloud import bigquery @@ -184,16 +219,28 @@ def test_client_library_upload_from_dataframe(temp_dataset): } ) client = bigquery.Client() - dataset_ref = client.dataset('my_dataset') + table_id = 'my_dataset.new_table' # [END bigquery_migration_client_library_upload_from_dataframe] - dataset_ref = client.dataset(temp_dataset.dataset_id) + table_id = ( + temp_dataset.dataset_id + + ".test_client_library_upload_from_dataframe" + ) # [START bigquery_migration_client_library_upload_from_dataframe] - table_ref = dataset_ref.table('new_table') + # Since string columns use the "object" dtype, pass in a (partial) schema + # to ensure the correct BigQuery data type. + job_config = bigquery.LoadJobConfig(schema=[ + bigquery.SchemaField("my_string", "STRING"), + ]) + + job = client.load_table_from_dataframe( + df, table_id, job_config=job_config + ) - client.load_table_from_dataframe(df, table_ref).result() + # Wait for the load job to complete. + job.result() # [END bigquery_migration_client_library_upload_from_dataframe] client = bigquery.Client() - table = client.get_table(table_ref) + table = client.get_table(table_id) assert table.num_rows == 3 @@ -209,16 +256,16 @@ def test_pandas_gbq_upload_from_dataframe(temp_dataset): 'my_float64': [4.0, 5.0, 6.0], } ) - full_table_id = 'my_dataset.new_table' - project_id = 'my-project-id' + table_id = 'my_dataset.new_table' # [END bigquery_migration_pandas_gbq_upload_from_dataframe] - table_id = 'new_table' - full_table_id = '{}.{}'.format(temp_dataset.dataset_id, table_id) - project_id = os.environ['GCLOUD_PROJECT'] + table_id = ( + temp_dataset.dataset_id + + ".test_pandas_gbq_upload_from_dataframe" + ) # [START bigquery_migration_pandas_gbq_upload_from_dataframe] - df.to_gbq(full_table_id, project_id=project_id) + df.to_gbq(table_id) # [END bigquery_migration_pandas_gbq_upload_from_dataframe] client = bigquery.Client() - table = client.get_table(temp_dataset.table(table_id)) + table = client.get_table(table_id) assert table.num_rows == 3 diff --git a/bigquery_storage/to_dataframe/jupyter_test.py b/bigquery_storage/to_dataframe/jupyter_test.py index ef1b0ddb74f..7997ee2eac1 100644 --- a/bigquery_storage/to_dataframe/jupyter_test.py +++ b/bigquery_storage/to_dataframe/jupyter_test.py @@ -75,9 +75,6 @@ def test_jupyter_small_query(ipython): assert "stackoverflow" in ip.user_ns # verify that variable exists -@pytest.mark.skipif( - "TRAVIS" in os.environ, reason="Not running long-running queries on Travis" -) def test_jupyter_tutorial(ipython): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -86,33 +83,18 @@ def test_jupyter_tutorial(ipython): # speed-up of using the BigQuery Storage API to download the results. sample = """ # [START bigquerystorage_jupyter_tutorial_query] - %%bigquery nodejs_deps --use_bqstorage_api - SELECT - dependency_name, - dependency_platform, - project_name, - project_id, - version_number, - version_id, - dependency_kind, - optional_dependency, - dependency_requirements, - dependency_project_id - FROM - `bigquery-public-data.libraries_io.dependencies` - WHERE - LOWER(dependency_platform) = 'npm' - LIMIT 2500000 + %%bigquery tax_forms --use_bqstorage_api + SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012` # [END bigquerystorage_jupyter_tutorial_query] """ result = ip.run_cell(_strip_region_tags(sample)) result.raise_error() # Throws an exception if the cell failed. - assert "nodejs_deps" in ip.user_ns # verify that variable exists - nodejs_deps = ip.user_ns["nodejs_deps"] + assert "tax_forms" in ip.user_ns # verify that variable exists + tax_forms = ip.user_ns["tax_forms"] # [START bigquerystorage_jupyter_tutorial_results] - nodejs_deps.head() + tax_forms.head() # [END bigquerystorage_jupyter_tutorial_results] # [START bigquerystorage_jupyter_tutorial_context] @@ -123,26 +105,11 @@ def test_jupyter_tutorial(ipython): sample = """ # [START bigquerystorage_jupyter_tutorial_query_default] - %%bigquery java_deps - SELECT - dependency_name, - dependency_platform, - project_name, - project_id, - version_number, - version_id, - dependency_kind, - optional_dependency, - dependency_requirements, - dependency_project_id - FROM - `bigquery-public-data.libraries_io.dependencies` - WHERE - LOWER(dependency_platform) = 'maven' - LIMIT 2500000 + %%bigquery tax_forms + SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012` # [END bigquerystorage_jupyter_tutorial_query_default] """ result = ip.run_cell(_strip_region_tags(sample)) result.raise_error() # Throws an exception if the cell failed. - assert "java_deps" in ip.user_ns # verify that variable exists + assert "tax_forms" in ip.user_ns # verify that variable exists diff --git a/bigquery_storage/to_dataframe/requirements.txt b/bigquery_storage/to_dataframe/requirements.txt index 5dad9dad47d..d5a1d3b5bb7 100644 --- a/bigquery_storage/to_dataframe/requirements.txt +++ b/bigquery_storage/to_dataframe/requirements.txt @@ -1,6 +1,6 @@ google-auth==1.6.2 -google-cloud-bigquery-storage==0.6.0 -google-cloud-bigquery==1.17.0 -pyarrow==0.13.0 +google-cloud-bigquery-storage==0.7.0 +google-cloud-bigquery==1.20.0 +pyarrow==0.14.1 ipython==7.2.0 -pandas==0.24.2 \ No newline at end of file +pandas==0.25.1 \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index d89cddcbb6d..ebc46caa9c3 100644 --- a/noxfile.py +++ b/noxfile.py @@ -167,7 +167,8 @@ def _setup_appengine_sdk(session): PY3_ONLY_SAMPLES = [ sample for sample in ALL_TESTED_SAMPLES if (sample.startswith('./appengine/standard_python37') - or sample.startswith('./functions/'))] + or sample.startswith('./functions/') + or sample.startswith('./bigquery/pandas-gbq-migration'))] NON_GAE_STANDARD_SAMPLES_PY2 = sorted(list(( set(ALL_TESTED_SAMPLES) - set(GAE_STANDARD_SAMPLES)) -