From e1b9d0ed7235d606b019407dbca2e4f5dfd0e4f5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Sat, 23 Mar 2019 14:28:28 -0700 Subject: [PATCH 1/7] DOC: Document BigQuery to dtype translation for read_gbq Adds a table documenting the current behavior, including that pandas 0.24.0 stores as time zone aware dtype and earlier versions store naive. I could not figure out how to make 0.24.0+ store as a naive dtype, nor could I figure out how to make earlier versions use time zone aware. --- docs/source/changelog.rst | 6 ++++ docs/source/reading.rst | 74 +++++++++++++++++++++++++++++---------- pandas_gbq/gbq.py | 3 ++ 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index c2c14cf5..e97f1f3d 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -12,6 +12,12 @@ Changelog version. This is required to use new functionality such as the BigQuery Storage API. (:issue:`267`) +Documentation +~~~~~~~~~~~~~ + +- Document :ref:`BigQuery data type to pandas dtype conversion + ` for ``read_gbq``. (:issue:`269`) + Dependency updates ~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/reading.rst b/docs/source/reading.rst index add61ed2..803690af 100644 --- a/docs/source/reading.rst +++ b/docs/source/reading.rst @@ -9,21 +9,32 @@ Suppose you want to load all data from an existing BigQuery table .. code-block:: python - # Insert your BigQuery Project ID Here - # Can be found in the Google web console + import pandas_gbq + + # TODO: Set your BigQuery Project ID. projectid = "xxxxxxxx" - data_frame = read_gbq('SELECT * FROM test_dataset.test_table', projectid) + data_frame = pandas_gbq.read_gbq( + 'SELECT * FROM `test_dataset.test_table`', + project_id=projectid) + +.. note:: + A project ID is sometimes optional if it can be inferred during + authentication, but it is required when authenticating with user + credentials. You can find your project ID in the `Google Cloud console + `__. You can define which column from BigQuery to use as an index in the destination DataFrame as well as a preferred column order as follows: .. code-block:: python - data_frame = read_gbq('SELECT * FROM test_dataset.test_table', - index_col='index_column_name', - col_order=['col1', 'col2', 'col3'], projectid) + data_frame = pandas_gbq.read_gbq( + 'SELECT * FROM `test_dataset.test_table`', + project_id=projectid, + index_col='index_column_name', + col_order=['col1', 'col2', 'col3']) You can specify the query config as parameter to use additional options of @@ -37,20 +48,45 @@ your job. For more information about query configuration parameters see `here "useQueryCache": False } } - data_frame = read_gbq('SELECT * FROM test_dataset.test_table', - configuration=configuration, projectid) - + data_frame = read_gbq( + 'SELECT * FROM `test_dataset.test_table`', + project_id=projectid, + configuration=configuration) -.. note:: - - You can find your project id in the `Google developers console - `__. +The ``dialect`` argument can be used to indicate whether to use +BigQuery's ``'legacy'`` SQL or BigQuery's ``'standard'`` SQL (beta). The +default value is ``'standard'`` For more information on BigQuery's standard +SQL, see `BigQuery SQL Reference +`__ -.. note:: +.. code-block:: python - The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL - or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``, though this will change - in a subsequent release to ``'standard'``. For more information - on BigQuery's standard SQL, see `BigQuery SQL Reference - `__ + data_frame = pandas_gbq.read_gbq( + 'SELECT * FROM [test_dataset.test_table]', + project_id=projectid, + dialect='legacy') + + +.. _reading-dtypes: + +Inferring the DataFrame's dtypes +-------------------------------- + +The :func:`~pandas_gbq.read_gbq` method infers the pandas dtype for each column, based on the BigQuery table schema. + +================== ========================= +BigQuery Data Type dtype +================== ========================= +FLOAT float +------------------ ------------------------- +TIMESTAMP **pandas versions 0.24.0+** + :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and + ``tz='UTC'`` + **Earlier versions** + datetime64[ns] +------------------ ------------------------- +DATETIME datetime64[ns] +TIME datetime64[ns] +DATE datetime64[ns] +================== ========================= diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 17d18263..8b60bad4 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -650,6 +650,9 @@ def _bqschema_to_nullsafe_dtypes(schema_fields): # See: # http://pandas.pydata.org/pandas-docs/dev/missing_data.html # #missing-data-casting-rules-and-indexing + # + # If you update this mapping, also update the table at + # `docs/source/reading.rst`. dtype_map = { "FLOAT": np.dtype(float), # Even though TIMESTAMPs are timezone-aware in BigQuery, pandas doesn't From e4a2372966997fb8f765de6a97ab75823c7abb98 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 14:29:54 -0700 Subject: [PATCH 2/7] Use tz-aware dtype for TIMESTAMP column on all versions --- docs/source/reading.rst | 8 +----- pandas_gbq/gbq.py | 59 ++++++++++++++++++++++++++++++++-------- tests/system/test_gbq.py | 33 ++++++++++++---------- tests/unit/test_gbq.py | 2 +- 4 files changed, 67 insertions(+), 35 deletions(-) diff --git a/docs/source/reading.rst b/docs/source/reading.rst index 803690af..4a7b9d66 100644 --- a/docs/source/reading.rst +++ b/docs/source/reading.rst @@ -79,13 +79,7 @@ The :func:`~pandas_gbq.read_gbq` method infers the pandas dtype for each column, BigQuery Data Type dtype ================== ========================= FLOAT float ------------------- ------------------------- -TIMESTAMP **pandas versions 0.24.0+** - :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and - ``tz='UTC'`` - **Earlier versions** - datetime64[ns] ------------------- ------------------------- +TIMESTAMP :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and ``tz='UTC'`` DATETIME datetime64[ns] TIME datetime64[ns] DATE datetime64[ns] diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 8b60bad4..30714b49 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -12,6 +12,7 @@ BIGQUERY_INSTALLED_VERSION = None SHOW_VERBOSE_DEPRECATION = False SHOW_PRIVATE_KEY_DEPRECATION = False +USE_TZAWARE_TIMESTAMP = False PRIVATE_KEY_DEPRECATION_MESSAGE = ( "private_key is deprecated and will be removed in a future version." "Use the credentials argument instead. See " @@ -26,7 +27,7 @@ def _check_google_client_version(): - global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION + global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP try: import pkg_resources @@ -61,6 +62,12 @@ def _check_google_client_version(): SHOW_PRIVATE_KEY_DEPRECATION = ( pandas_installed_version >= pandas_version_with_credentials_arg ) + pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version( + "0.24.0" + ) + USE_TZAWARE_TIMESTAMP = ( + pandas_installed_version >= pandas_version_supporting_tzaware_dtype + ) def _test_google_api_imports(): @@ -494,6 +501,9 @@ def run_query(self, query, **kwargs): if df.empty: df = _cast_empty_df_dtypes(schema_fields, df) + if not USE_TZAWARE_TIMESTAMP: + df = _localize_df(schema_fields, df) + logger.debug("Got {} rows.\n".format(rows_iter.total_rows)) return df @@ -644,21 +654,28 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema): def _bqschema_to_nullsafe_dtypes(schema_fields): - # Only specify dtype when the dtype allows nulls. Otherwise, use pandas's - # default dtype choice. - # - # See: - # http://pandas.pydata.org/pandas-docs/dev/missing_data.html - # #missing-data-casting-rules-and-indexing - # + """Specify explicit dtypes based on BigQuery schema. + + This function only specifies a dtype when the dtype allows nulls. + Otherwise, use pandas's default dtype choice. + + See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html + #missing-data-casting-rules-and-indexing + """ + import pandas.api.types + + # pandas doesn't support timezone-aware dtype in DataFrame/Series + # constructors until 0.24.0. See: + # https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947 + timestamp_dtype = "datetime64[ns]" + if USE_TZAWARE_TIMESTAMP: + timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC") + # If you update this mapping, also update the table at # `docs/source/reading.rst`. dtype_map = { "FLOAT": np.dtype(float), - # Even though TIMESTAMPs are timezone-aware in BigQuery, pandas doesn't - # support datetime64[ns, UTC] as dtype in DataFrame constructors. See: - # https://github.com/pandas-dev/pandas/issues/12513 - "TIMESTAMP": "datetime64[ns]", + "TIMESTAMP": timestamp_dtype, "TIME": "datetime64[ns]", "DATE": "datetime64[ns]", "DATETIME": "datetime64[ns]", @@ -705,6 +722,24 @@ def _cast_empty_df_dtypes(schema_fields, df): return df +def _localize_df(schema_fields, df): + """Localize any TIMESTAMP columns to tz-aware type. + + In pandas versions before 0.24.0, DatetimeTZDtype cannot be used as the + dtype in Series/DataFrame construction, so localize those columns after + the DataFrame is constructed. + """ + for field in schema_fields: + column = str(field["name"]) + if field["mode"].upper() == "REPEATED": + continue + + if field["type"].upper() == "TIMESTAMP": + df[column] = df[column].dt.tz_localize("UTC") + + return df + + def read_gbq( query, project_id=None, diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 4480f203..427137b9 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -310,13 +310,12 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id): credentials=self.credentials, dialect="legacy", ) - tm.assert_frame_equal( - df, - DataFrame( - {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]}, - dtype="datetime64[ns]", - ), + expected = DataFrame( + {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]}, + dtype="datetime64[ns]", ) + expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC") + tm.assert_frame_equal(df, expected) def test_should_properly_handle_arbitrary_timestamp(self, project_id): query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp' @@ -326,13 +325,14 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id): credentials=self.credentials, dialect="legacy", ) - tm.assert_frame_equal( - df, - DataFrame( - {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]}, - dtype="datetime64[ns]", - ), + expected = DataFrame( + {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]}, + dtype="datetime64[ns]", ) + expected["valid_timestamp"] = expected[ + "valid_timestamp" + ].dt.tz_localize("UTC") + tm.assert_frame_equal(df, expected) def test_should_properly_handle_datetime_unix_epoch(self, project_id): query = 'SELECT DATETIME("1970-01-01 00:00:00") AS unix_epoch' @@ -368,7 +368,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id): "expression, is_expected_dtype", [ ("current_date()", pandas.api.types.is_datetime64_ns_dtype), - ("current_timestamp()", pandas.api.types.is_datetime64_ns_dtype), + ("current_timestamp()", pandas.api.types.is_datetime64tz_dtype), ("current_datetime()", pandas.api.types.is_datetime64_ns_dtype), ("TRUE", pandas.api.types.is_bool_dtype), ("FALSE", pandas.api.types.is_bool_dtype), @@ -402,9 +402,11 @@ def test_should_properly_handle_null_timestamp(self, project_id): credentials=self.credentials, dialect="legacy", ) - tm.assert_frame_equal( - df, DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]") + expected = DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]") + expected["null_timestamp"] = expected["null_timestamp"].dt.tz_localize( + "UTC" ) + tm.assert_frame_equal(df, expected) def test_should_properly_handle_null_datetime(self, project_id): query = "SELECT CAST(NULL AS DATETIME) AS null_datetime" @@ -594,6 +596,7 @@ def test_zero_rows(self, project_id): expected_result = DataFrame( empty_columns, columns=["title", "id", "is_bot", "ts"] ) + expected_result["ts"] = expected_result["ts"].dt.tz_localize("UTC") tm.assert_frame_equal(df, expected_result, check_index_type=False) def test_one_row_one_column(self, project_id): diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 3a047741..f0fa7ce6 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -90,7 +90,7 @@ def no_auth(monkeypatch): ("INTEGER", None), # Can't handle NULL ("BOOLEAN", None), # Can't handle NULL ("FLOAT", numpy.dtype(float)), - ("TIMESTAMP", "datetime64[ns]"), + ("TIMESTAMP", "datetime64[ns, UTC]"), ("DATETIME", "datetime64[ns]"), ], ) From daf315e0f446307e9e7d81d3c9d9bb58bffae921 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 14:47:45 -0700 Subject: [PATCH 3/7] Fix unit tests on old pandas --- tests/unit/test_gbq.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index f0fa7ce6..267a995e 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -1,21 +1,26 @@ # -*- coding: utf-8 -*- -import pandas.util.testing as tm -import pytest +try: + import mock +except ImportError: # pragma: NO COVER + from unittest import mock + import numpy from pandas import DataFrame +import pandas.util.testing as tm +import pkg_resources +import pytest import pandas_gbq.exceptions from pandas_gbq import gbq -try: - import mock -except ImportError: # pragma: NO COVER - from unittest import mock pytestmark = pytest.mark.filter_warnings( "ignore:credentials from Google Cloud SDK" ) +pandas_installed_version = pkg_resources.get_distribution( + "pandas" +).parsed_version @pytest.fixture @@ -90,7 +95,6 @@ def no_auth(monkeypatch): ("INTEGER", None), # Can't handle NULL ("BOOLEAN", None), # Can't handle NULL ("FLOAT", numpy.dtype(float)), - ("TIMESTAMP", "datetime64[ns, UTC]"), ("DATETIME", "datetime64[ns]"), ], ) @@ -104,6 +108,16 @@ def test_should_return_bigquery_correctly_typed(type_, expected): assert result == {"x": expected} +def test_should_return_bigquery_correctly_typed_timestamp(): + result = gbq._bqschema_to_nullsafe_dtypes( + [dict(name="x", type="TIMESTAMP", mode="NULLABLE")] + ) + if pandas_installed_version < pkg_resources.parse_version("0.24.0"): + assert result == {"x": "datetime64[ns]"} + else: + assert result == {"x": "datetime64[ns, UTC]"} + + def test_to_gbq_should_fail_if_invalid_table_name_passed(): with pytest.raises(gbq.NotFoundException): gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234") @@ -200,6 +214,10 @@ def test_to_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version): assert len(recwarn) == 0 +@pytest.mark.skipif( + pandas_installed_version < pkg_resources.parse_version("0.24.0"), + reason="Requires pandas 0.24+", +) def test_to_gbq_with_private_key_new_pandas_warns_deprecation( min_bq_version, monkeypatch ): @@ -413,6 +431,10 @@ def test_read_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version): assert len(recwarn) == 0 +@pytest.mark.skipif( + pandas_installed_version < pkg_resources.parse_version("0.24.0"), + reason="Requires pandas 0.24+", +) def test_read_gbq_with_private_key_new_pandas_warns_deprecation( min_bq_version, monkeypatch ): From a0fa0a56c18c725c355d0cc43a32a8d752864296 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 14:50:02 -0700 Subject: [PATCH 4/7] Add tz-aware change back to changelog. --- docs/source/changelog.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index e97f1f3d..d710b37f 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -33,11 +33,14 @@ Internal changes Enhancements ~~~~~~~~~~~~ + - Allow ``table_schema`` in :func:`to_gbq` to contain only a subset of columns, with the rest being populated using the DataFrame dtypes (:issue:`218`) (contributed by @johnpaton) - Read ``project_id`` in :func:`to_gbq` from provided ``credentials`` if available (contributed by @daureg) +- ``read_gbq`` uses the timezone-aware ``DatetimeTZDtype(unit='ns', + tz='UTC')`` dtype for BigQuery ``TIMESTAMP`` columns. (:issue:`269`) .. _changelog-0.9.0: From 65d31ebccffa4970d57b1c2f4121e6148ce68800 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 15:03:04 -0700 Subject: [PATCH 5/7] Don't localize TIMESTAMP columns if they are already tz-aware. --- pandas_gbq/gbq.py | 29 ++++++++--------------------- tests/system/test_gbq.py | 7 ++++--- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 30714b49..b9978887 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -12,7 +12,6 @@ BIGQUERY_INSTALLED_VERSION = None SHOW_VERBOSE_DEPRECATION = False SHOW_PRIVATE_KEY_DEPRECATION = False -USE_TZAWARE_TIMESTAMP = False PRIVATE_KEY_DEPRECATION_MESSAGE = ( "private_key is deprecated and will be removed in a future version." "Use the credentials argument instead. See " @@ -27,7 +26,7 @@ def _check_google_client_version(): - global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP + global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION try: import pkg_resources @@ -62,12 +61,6 @@ def _check_google_client_version(): SHOW_PRIVATE_KEY_DEPRECATION = ( pandas_installed_version >= pandas_version_with_credentials_arg ) - pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version( - "0.24.0" - ) - USE_TZAWARE_TIMESTAMP = ( - pandas_installed_version >= pandas_version_supporting_tzaware_dtype - ) def _test_google_api_imports(): @@ -501,8 +494,8 @@ def run_query(self, query, **kwargs): if df.empty: df = _cast_empty_df_dtypes(schema_fields, df) - if not USE_TZAWARE_TIMESTAMP: - df = _localize_df(schema_fields, df) + # Ensure any TIMESTAMP columns are tz-aware. + df = _localize_df(schema_fields, df) logger.debug("Got {} rows.\n".format(rows_iter.total_rows)) return df @@ -662,20 +655,14 @@ def _bqschema_to_nullsafe_dtypes(schema_fields): See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html #missing-data-casting-rules-and-indexing """ - import pandas.api.types - - # pandas doesn't support timezone-aware dtype in DataFrame/Series - # constructors until 0.24.0. See: - # https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947 - timestamp_dtype = "datetime64[ns]" - if USE_TZAWARE_TIMESTAMP: - timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC") - # If you update this mapping, also update the table at # `docs/source/reading.rst`. dtype_map = { "FLOAT": np.dtype(float), - "TIMESTAMP": timestamp_dtype, + # pandas doesn't support timezone-aware dtype in DataFrame/Series + # constructors. It's more idiomatic to localize after construction. + # https://github.com/pandas-dev/pandas/issues/25843 + "TIMESTAMP": "datetime64[ns]", "TIME": "datetime64[ns]", "DATE": "datetime64[ns]", "DATETIME": "datetime64[ns]", @@ -734,7 +721,7 @@ def _localize_df(schema_fields, df): if field["mode"].upper() == "REPEATED": continue - if field["type"].upper() == "TIMESTAMP": + if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None: df[column] = df[column].dt.tz_localize("UTC") return df diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 427137b9..11b10952 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -329,9 +329,10 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id): {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]}, dtype="datetime64[ns]", ) - expected["valid_timestamp"] = expected[ - "valid_timestamp" - ].dt.tz_localize("UTC") + if expected["valid_timestamp"].dt.tz is None: + expected["valid_timestamp"] = expected[ + "valid_timestamp" + ].dt.tz_localize("UTC") tm.assert_frame_equal(df, expected) def test_should_properly_handle_datetime_unix_epoch(self, project_id): From 7db8b1f91bb0c27fce8ce8a9272510ba3726e887 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 15:05:18 -0700 Subject: [PATCH 6/7] Fix tests --- tests/system/test_gbq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index 11b10952..ce65f4e2 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -314,7 +314,8 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id): {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]}, dtype="datetime64[ns]", ) - expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC") + if expected["unix_epoch"].dt.tz is None: + expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC") tm.assert_frame_equal(df, expected) def test_should_properly_handle_arbitrary_timestamp(self, project_id): From b933dfce15d09b3d57f48c855c3c56576552b3f6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 3 Apr 2019 15:46:11 -0700 Subject: [PATCH 7/7] Fix unit test. Blacken --- tests/system/test_gbq.py | 4 +++- tests/unit/test_gbq.py | 12 ++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py index ce65f4e2..6c876068 100644 --- a/tests/system/test_gbq.py +++ b/tests/system/test_gbq.py @@ -315,7 +315,9 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id): dtype="datetime64[ns]", ) if expected["unix_epoch"].dt.tz is None: - expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC") + expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize( + "UTC" + ) tm.assert_frame_equal(df, expected) def test_should_properly_handle_arbitrary_timestamp(self, project_id): diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py index 267a995e..6956be20 100644 --- a/tests/unit/test_gbq.py +++ b/tests/unit/test_gbq.py @@ -95,6 +95,8 @@ def no_auth(monkeypatch): ("INTEGER", None), # Can't handle NULL ("BOOLEAN", None), # Can't handle NULL ("FLOAT", numpy.dtype(float)), + # TIMESTAMP will be localized after DataFrame construction. + ("TIMESTAMP", "datetime64[ns]"), ("DATETIME", "datetime64[ns]"), ], ) @@ -108,16 +110,6 @@ def test_should_return_bigquery_correctly_typed(type_, expected): assert result == {"x": expected} -def test_should_return_bigquery_correctly_typed_timestamp(): - result = gbq._bqschema_to_nullsafe_dtypes( - [dict(name="x", type="TIMESTAMP", mode="NULLABLE")] - ) - if pandas_installed_version < pkg_resources.parse_version("0.24.0"): - assert result == {"x": "datetime64[ns]"} - else: - assert result == {"x": "datetime64[ns, UTC]"} - - def test_to_gbq_should_fail_if_invalid_table_name_passed(): with pytest.raises(gbq.NotFoundException): gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234")