From c147ba4faf176a5bcd67c9387b4651d4f9238886 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 15 Jul 2015 01:14:55 +0300 Subject: [PATCH 1/5] Add Python 3 support for read_gbq --- pandas/io/gbq.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 06ad8827a5642..c9133df5976c4 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -15,9 +15,6 @@ def _check_google_client_version(): - if compat.PY3: - raise NotImplementedError("Google's libraries do not support Python 3 yet") - try: import pkg_resources @@ -26,8 +23,9 @@ def _check_google_client_version(): _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < '1.2.0': - raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google " + required_version = '1.4.0' if compat.PY3 else '1.2.0' + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) < required_version: + raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google " "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION) logger = logging.getLogger('pandas.io.gbq') @@ -267,10 +265,10 @@ def _parse_data(schema, rows): fields = schema['fields'] col_types = [field['type'] for field in fields] - col_names = [field['name'].encode('ascii', 'ignore') for field in fields] + col_names = [field['name'] for field in fields] col_dtypes = [dtype_map.get(field['type'], object) for field in fields] page_array = np.zeros((len(rows),), - dtype=zip(col_names, col_dtypes)) + dtype=list(zip(col_names, col_dtypes))) for row_num, raw_row in enumerate(rows): entries = raw_row.get('f', []) From 80d79a276f4c9b98cac77f148e06fd52ed5e1459 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 15 Jul 2015 01:37:13 +0300 Subject: [PATCH 2/5] Add silent optional parameter for read_gbq --- pandas/io/gbq.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index c9133df5976c4..48b500b0f4aa2 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -131,7 +131,7 @@ def get_service(self, credentials): return bigquery_service - def run_query(self, query): + def run_query(self, query, silent): try: from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError @@ -180,7 +180,8 @@ def run_query(self, query): job_reference = query_reply['jobReference'] while(not query_reply.get('jobComplete', False)): - print('Job not yet complete...') + if not silent: + print('Job not yet complete...') query_reply = job_collection.getQueryResults( projectId=job_reference['projectId'], jobId=job_reference['jobId']).execute() @@ -292,7 +293,7 @@ def _parse_entry(field_value, field_type): return field_value -def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False): +def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, silent = False): """Load data from Google BigQuery. THIS IS AN EXPERIMENTAL LIBRARY @@ -317,6 +318,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals reauth : boolean (default False) Force Google BigQuery to reauthenticate the user. This is useful if multiple accounts are used. + silent : boolean (default False) + Do not print status messages during query execution if True Returns ------- @@ -330,7 +333,7 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=Fals raise TypeError("Missing required parameter: project_id") connector = GbqConnector(project_id, reauth = reauth) - schema, pages = connector.run_query(query) + schema, pages = connector.run_query(query, silent = silent) dataframe_list = [] while len(pages) > 0: page = pages.pop() From e5b8165dca8c293d6a186b77e8ef755dc082e328 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Wed, 15 Jul 2015 01:41:10 +0300 Subject: [PATCH 3/5] Fix typo (add missing "is") --- pandas/io/gbq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 48b500b0f4aa2..87157205591a2 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -181,7 +181,7 @@ def run_query(self, query, silent): while(not query_reply.get('jobComplete', False)): if not silent: - print('Job not yet complete...') + print('Job is not yet complete...') query_reply = job_collection.getQueryResults( projectId=job_reference['projectId'], jobId=job_reference['jobId']).execute() From 7c5b1ee2e9cf72b8d78741ebbfd089a35cb1bf7c Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 27 Jul 2015 19:43:51 +0300 Subject: [PATCH 4/5] Update test_gbq.py --- pandas/io/tests/test_gbq.py | 60 +++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 5417842d3f863..ee3ab316b4d5f 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -39,47 +39,43 @@ def missing_bq(): return True def _test_imports(): - if not compat.PY3: + required_version = '1.4.0' if compat.PY3 else '1.2.0' - global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ - _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED + global _GOOGLE_API_CLIENT_INSTALLED, _GOOGLE_API_CLIENT_VALID_VERSION, \ + _HTTPLIB2_INSTALLED, _SETUPTOOLS_INSTALLED - try: - import pkg_resources - _SETUPTOOLS_INSTALLED = True - except ImportError: - _SETUPTOOLS_INSTALLED = False + try: + import pkg_resources + _SETUPTOOLS_INSTALLED = True + except ImportError: + _SETUPTOOLS_INSTALLED = False - if _SETUPTOOLS_INSTALLED: - try: - from apiclient.discovery import build - from apiclient.errors import HttpError + if _SETUPTOOLS_INSTALLED: + try: + from apiclient.discovery import build + from apiclient.errors import HttpError - from oauth2client.client import OAuth2WebServerFlow - from oauth2client.client import AccessTokenRefreshError + from oauth2client.client import OAuth2WebServerFlow + from oauth2client.client import AccessTokenRefreshError - from oauth2client.file import Storage - from oauth2client.tools import run_flow - _GOOGLE_API_CLIENT_INSTALLED=True - _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version + from oauth2client.file import Storage + from oauth2client.tools import run_flow + _GOOGLE_API_CLIENT_INSTALLED=True + _GOOGLE_API_CLIENT_VERSION = pkg_resources.get_distribution('google-api-python-client').version - if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= '1.2.0': - _GOOGLE_API_CLIENT_VALID_VERSION = True + if LooseVersion(_GOOGLE_API_CLIENT_VERSION) >= required_version: + _GOOGLE_API_CLIENT_VALID_VERSION = True - except ImportError: - _GOOGLE_API_CLIENT_INSTALLED = False + except ImportError: + _GOOGLE_API_CLIENT_INSTALLED = False - try: - import httplib2 - _HTTPLIB2_INSTALLED = True - except ImportError: - _HTTPLIB2_INSTALLED = False + try: + import httplib2 + _HTTPLIB2_INSTALLED = True + except ImportError: + _HTTPLIB2_INSTALLED = False - - if compat.PY3: - raise NotImplementedError("Google's libraries do not support Python 3 yet") - if not _SETUPTOOLS_INSTALLED: raise ImportError('Could not import pkg_resources (setuptools).') @@ -87,7 +83,7 @@ def _test_imports(): raise ImportError('Could not import Google API Client.') if not _GOOGLE_API_CLIENT_VALID_VERSION: - raise ImportError("pandas requires google-api-python-client >= 1.2.0 for Google " + raise ImportError("pandas requires google-api-python-client >= " + required_version + " for Google " "BigQuery support, current version " + _GOOGLE_API_CLIENT_VERSION) if not _HTTPLIB2_INSTALLED: From 45e157b06b38f25e5940cf13ae10c37d395da4e1 Mon Sep 17 00:00:00 2001 From: Alexander Rodin Date: Mon, 27 Jul 2015 20:10:35 +0300 Subject: [PATCH 5/5] Add test case for silent=True option to read_gbq --- pandas/io/tests/test_gbq.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index ee3ab316b4d5f..c9883414da36f 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -9,6 +9,7 @@ import sys import platform from time import sleep +from io import StringIO import numpy as np @@ -291,6 +292,14 @@ def test_download_dataset_larger_than_200k_rows(self): # http://stackoverflow.com/questions/19145587/bq-py-not-paging-results df = gbq.read_gbq("SELECT id FROM [publicdata:samples.wikipedia] GROUP EACH BY id ORDER BY id ASC LIMIT 200005", project_id=PROJECT_ID) self.assertEqual(len(df.drop_duplicates()), 200005) + + def test_silent_option_true(self): + stdout = sys.stdout + sys.stdout = StringIO() + gbq.read_gbq("SELECT 3", project_id = PROJECT_ID, silent = True) + output = sys.stdout.getvalue() + sys.stdout = stdout + tm.assert_equal(output, "") class TestToGBQIntegration(tm.TestCase): # This class requires bq.py to be installed for setup/teardown.