diff --git a/doc/source/io.rst b/doc/source/io.rst
index ee5734aaf9494..2866371cce61a 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4482,6 +4482,13 @@ destination DataFrame as well as a preferred column order as follows:
You can toggle the verbose output via the ``verbose`` flag which defaults to ``True``.
+.. note::
+
+ The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL
+ or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``. For more information
+ on BigQuery's standard SQL, see `BigQuery SQL Reference
+ `__
+
.. _io.bigquery_writer:
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 64e6bc0ab307c..07b127cac942a 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -301,6 +301,12 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci
``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`)
+.. _whatsnew_0170.gbq:
+
+Google BigQuery Enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- The :func:`pandas.io.gbq.read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the :ref:`docs ` for more details (:issue:`13615`).
+
.. _whatsnew_0190.enhancements.other:
Other enhancements
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 140f5cc6bb6e3..94def5c265195 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -145,13 +145,14 @@ class GbqConnector(object):
scope = 'https://www.googleapis.com/auth/bigquery'
def __init__(self, project_id, reauth=False, verbose=False,
- private_key=None):
+ private_key=None, dialect='legacy'):
_check_google_client_version()
_test_google_api_imports()
self.project_id = project_id
self.reauth = reauth
self.verbose = verbose
self.private_key = private_key
+ self.dialect = dialect
self.credentials = self.get_credentials()
self.service = self.get_service()
@@ -334,7 +335,8 @@ def run_query(self, query):
job_data = {
'configuration': {
'query': {
- 'query': query
+ 'query': query,
+ 'useLegacySql': self.dialect == 'legacy'
# 'allowLargeResults', 'createDisposition',
# 'preserveNulls', destinationTable, useQueryCache
}
@@ -563,7 +565,7 @@ def _parse_entry(field_value, field_type):
def read_gbq(query, project_id=None, index_col=None, col_order=None,
- reauth=False, verbose=True, private_key=None):
+ reauth=False, verbose=True, private_key=None, dialect='legacy'):
"""Load data from Google BigQuery.
THIS IS AN EXPERIMENTAL LIBRARY
@@ -601,6 +603,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
Service account private key in JSON format. Can be file path
or string contents. This is useful for remote server
authentication (eg. jupyter iPython notebook on remote host)
+ dialect : {'legacy', 'standard'}, default 'legacy'
+ 'legacy' : Use BigQuery's legacy SQL dialect.
+ 'standard' : Use BigQuery's standard SQL (beta), which is
+ compliant with the SQL 2011 standard. For more information
+ see `BigQuery SQL Reference
+ `__
Returns
-------
@@ -612,8 +620,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
if not project_id:
raise TypeError("Missing required parameter: project_id")
+ if dialect not in ('legacy', 'standard'):
+ raise ValueError("'{0}' is not valid for dialect".format(dialect))
+
connector = GbqConnector(project_id, reauth=reauth, verbose=verbose,
- private_key=private_key)
+ private_key=private_key,
+ dialect=dialect)
schema, pages = connector.run_query(query)
dataframe_list = []
while len(pages) > 0:
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index 278c5d7215624..0d8512ffb5524 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -557,6 +557,50 @@ def test_zero_rows(self):
expected_result = DataFrame(page_array, columns=['title', 'id'])
self.assert_frame_equal(df, expected_result)
+ def test_legacy_sql(self):
+ legacy_sql = "SELECT id FROM [publicdata.samples.wikipedia] LIMIT 10"
+
+ # Test that a legacy sql statement fails when
+ # setting dialect='standard'
+ with tm.assertRaises(gbq.GenericGBQException):
+ gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
+ dialect='standard')
+
+ # Test that a legacy sql statement succeeds when
+ # setting dialect='legacy'
+ df = gbq.read_gbq(legacy_sql, project_id=PROJECT_ID,
+ dialect='legacy')
+ self.assertEqual(len(df.drop_duplicates()), 10)
+
+ def test_standard_sql(self):
+ standard_sql = "SELECT DISTINCT id FROM " \
+ "`publicdata.samples.wikipedia` LIMIT 10"
+
+ # Test that a standard sql statement fails when using
+ # the legacy SQL dialect (default value)
+ with tm.assertRaises(gbq.GenericGBQException):
+ gbq.read_gbq(standard_sql, project_id=PROJECT_ID)
+
+ # Test that a standard sql statement succeeds when
+ # setting dialect='standard'
+ df = gbq.read_gbq(standard_sql, project_id=PROJECT_ID,
+ dialect='standard')
+ self.assertEqual(len(df.drop_duplicates()), 10)
+
+ def test_invalid_option_for_sql_dialect(self):
+ sql_statement = "SELECT DISTINCT id FROM " \
+ "`publicdata.samples.wikipedia` LIMIT 10"
+
+ # Test that an invalid option for `dialect` raises ValueError
+ with tm.assertRaises(ValueError):
+ gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
+ dialect='invalid')
+
+ # Test that a correct option for dialect succeeds
+ # to make sure ValueError was due to invalid dialect
+ gbq.read_gbq(sql_statement, project_id=PROJECT_ID,
+ dialect='standard')
+
class TestToGBQIntegration(tm.TestCase):
# Changes to BigQuery table schema may take up to 2 minutes as of May 2015