From 53549c7f95877326302c72f1ac39dd96a31dd7ea Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Fri, 29 Dec 2017 23:56:18 -0500 Subject: [PATCH 1/2] Fix array bug in type conversion; add array and struct tests --- pandas_gbq/gbq.py | 5 ++-- pandas_gbq/tests/test_gbq.py | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 46a246e5..77efe100 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -867,11 +867,12 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, ) # cast BOOLEAN and INTEGER columns from object to bool/int - # if they dont have any nulls + # if they dont have any nulls AND field mode is not repeated (i.e., array) type_map = {'BOOLEAN': bool, 'INTEGER': int} for field in schema['fields']: if field['type'].upper() in type_map and \ - final_df[field['name']].notnull().all(): + final_df[field['name']].notnull().all() and \ + field['mode'] != 'repeated': final_df[field['name']] = \ final_df[field['name']].astype(type_map[field['type'].upper()]) diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index 6a2b8480..75274d97 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -920,6 +920,56 @@ def test_query_response_bytes(self): assert self.gbq_connector.sizeof_fmt(1.208926E24) == "1.0 YB" assert self.gbq_connector.sizeof_fmt(1.208926E28) == "10000.0 YB" + def test_struct(self): + query = """SELECT 1 int_field, + STRUCT("a" as letter, 1 as num) struct_field""" + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + dialect='standard') + tm.assert_frame_equal(df, DataFrame([[1, {"letter": "a", "num": 1}]], + columns=["int_field", "struct_field"])) + + def test_array(self): + query = """select ["a","x","b","y","c","z"] as letters""" + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + dialect='standard') + tm.assert_frame_equal(df, DataFrame([[["a", "x", "b", "y", "c", "z"]]], + columns=["letters"])) + + def test_array_length_zero(self): + query = """WITH t as ( + SELECT "a" letter, [""] as array_field + UNION ALL + SELECT "b" letter, [] as array_field) + + select letter, array_field, array_length(array_field) len + from t + order by letter ASC""" + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + dialect='standard') + tm.assert_frame_equal(df, DataFrame([["a", [""], 1], ["b", [], 0]], + columns=["letter", "array_field", "len"])) + + def test_array_agg(self): + query = """WITH t as ( + SELECT "a" letter, 1 num + UNION ALL + SELECT "b" letter, 2 num + UNION ALL + SELECT "a" letter, 3 num) + + select letter, array_agg(num order by num ASC) numbers + from t + group by letter + order by letter ASC""" + df = gbq.read_gbq(query, project_id=_get_project_id(), + private_key=_get_private_key_path(), + dialect='standard') + tm.assert_frame_equal(df, DataFrame([["a", [1, 3]], ["b", [2]]], + columns=["letter", "numbers"])) + class TestToGBQIntegrationWithServiceAccountKeyPath(object): # Changes to BigQuery table schema may take up to 2 minutes as of May 2015 From f45919761cad89da7ce57039bf57905dda5137a2 Mon Sep 17 00:00:00 2001 From: Jason Ng Date: Tue, 2 Jan 2018 13:07:48 -0500 Subject: [PATCH 2/2] Update changelog --- docs/source/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index f755dc80..78c4d6e4 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -5,6 +5,7 @@ Changelog ------------------ - Use the `google-cloud-bigquery `__ library for API calls. The ``google-cloud-bigquery`` package is a new dependency, and dependencies on ``google-api-python-client`` and ``httplib2`` are removed. See the `installation guide `__ for more details. (:issue:`93`) +- Structs and arrays are now named properly (:issue:`23`) and BigQuery functions like ``array_agg`` no longer run into errors during type conversion (:issue:`22`). - :func:`to_gbq` now uses a load job instead of the streaming API. Remove ``StreamingInsertError`` class, as it is no longer used by :func:`to_gbq`. (:issue:`7`, :issue:`75`) 0.2.1 / 2017-11-27