From e1b9d0ed7235d606b019407dbca2e4f5dfd0e4f5 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Sat, 23 Mar 2019 14:28:28 -0700
Subject: [PATCH 1/7] DOC: Document BigQuery to dtype translation for read_gbq

Adds a table documenting the current behavior, including that pandas
0.24.0 stores as time zone aware dtype and earlier versions store naive.
I could not figure out how to make 0.24.0+ store as a naive dtype, nor
could I figure out how to make earlier versions use time zone aware.
---
 docs/source/changelog.rst |  6 ++++
 docs/source/reading.rst   | 74 +++++++++++++++++++++++++++++----------
 pandas_gbq/gbq.py         |  3 ++
 3 files changed, 64 insertions(+), 19 deletions(-)
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index c2c14cf5..e97f1f3d 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -12,6 +12,12 @@ Changelog
   version. This is required to use new functionality such as the BigQuery
   Storage API. (:issue:`267`)
 
+Documentation
+~~~~~~~~~~~~~
+
+- Document :ref:`BigQuery data type to pandas dtype conversion
+  <reading-dtypes>` for ``read_gbq``. (:issue:`269`)
+
 Dependency updates
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/reading.rst b/docs/source/reading.rst
index add61ed2..803690af 100644
--- a/docs/source/reading.rst
+++ b/docs/source/reading.rst
@@ -9,21 +9,32 @@ Suppose you want to load all data from an existing BigQuery table
 
 .. code-block:: python
 
-   # Insert your BigQuery Project ID Here
-   # Can be found in the Google web console
+   import pandas_gbq
+
+   # TODO: Set your BigQuery Project ID.
    projectid = "xxxxxxxx"
 
-   data_frame = read_gbq('SELECT * FROM test_dataset.test_table', projectid)
+   data_frame = pandas_gbq.read_gbq(
+       'SELECT * FROM `test_dataset.test_table`',
+       project_id=projectid)
+
+.. note::
 
+    A project ID is sometimes optional if it can be inferred during
+    authentication, but it is required when authenticating with user
+    credentials. You can find your project ID in the `Google Cloud console
+    <https://console.cloud.google.com>`__.
 
 You can define which column from BigQuery to use as an index in the
 destination DataFrame as well as a preferred column order as follows:
 
 .. code-block:: python
 
-   data_frame = read_gbq('SELECT * FROM test_dataset.test_table',
-                          index_col='index_column_name',
-                          col_order=['col1', 'col2', 'col3'], projectid)
+   data_frame = pandas_gbq.read_gbq(
+       'SELECT * FROM `test_dataset.test_table`',
+       project_id=projectid,
+       index_col='index_column_name',
+       col_order=['col1', 'col2', 'col3'])
 
 
 You can specify the query config as parameter to use additional options of
@@ -37,20 +48,45 @@ your job. For more information about query configuration parameters see `here
         "useQueryCache": False
       }
    }
-   data_frame = read_gbq('SELECT * FROM test_dataset.test_table',
-                          configuration=configuration, projectid)
-
+   data_frame = read_gbq(
+       'SELECT * FROM `test_dataset.test_table`',
+       project_id=projectid,
+       configuration=configuration)
 
-.. note::
-
-   You can find your project id in the `Google developers console
-   <https://console.developers.google.com>`__.
 
+The ``dialect`` argument can be used to indicate whether to use
+BigQuery's ``'legacy'`` SQL or BigQuery's ``'standard'`` SQL (beta). The
+default value is ``'standard'`` For more information on BigQuery's standard
+SQL, see `BigQuery SQL Reference
+<https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__
 
-.. note::
+.. code-block:: python
 
-    The ``dialect`` argument can be used to indicate whether to use BigQuery's ``'legacy'`` SQL
-    or BigQuery's ``'standard'`` SQL (beta). The default value is ``'legacy'``, though this will change
-    in a subsequent release to ``'standard'``. For more information
-    on BigQuery's standard SQL, see `BigQuery SQL Reference
-    <https://cloud.google.com/bigquery/sql-reference/>`__
+   data_frame = pandas_gbq.read_gbq(
+       'SELECT * FROM [test_dataset.test_table]',
+       project_id=projectid,
+       dialect='legacy')
+
+
+.. _reading-dtypes:
+
+Inferring the DataFrame's dtypes
+--------------------------------
+
+The :func:`~pandas_gbq.read_gbq` method infers the pandas dtype for each column, based on the BigQuery table schema.
+
+================== =========================
+BigQuery Data Type dtype
+================== =========================
+FLOAT              float
+------------------ -------------------------
+TIMESTAMP          **pandas versions 0.24.0+**
+                     :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and
+                     ``tz='UTC'``
+                   **Earlier versions**
+                     datetime64[ns]
+------------------ -------------------------
+DATETIME           datetime64[ns]
+TIME               datetime64[ns]
+DATE               datetime64[ns]
+================== =========================
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 17d18263..8b60bad4 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -650,6 +650,9 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     # See:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
+    #
+    # If you update this mapping, also update the table at
+    # `docs/source/reading.rst`.
     dtype_map = {
         "FLOAT": np.dtype(float),
         # Even though TIMESTAMPs are timezone-aware in BigQuery, pandas doesn't

From e4a2372966997fb8f765de6a97ab75823c7abb98 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 14:29:54 -0700
Subject: [PATCH 2/7] Use tz-aware dtype for TIMESTAMP column on all versions

---
 docs/source/reading.rst  |  8 +-----
 pandas_gbq/gbq.py        | 59 ++++++++++++++++++++++++++++++++--------
 tests/system/test_gbq.py | 33 ++++++++++++----------
 tests/unit/test_gbq.py   |  2 +-
 4 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/docs/source/reading.rst b/docs/source/reading.rst
index 803690af..4a7b9d66 100644
--- a/docs/source/reading.rst
+++ b/docs/source/reading.rst
@@ -79,13 +79,7 @@ The :func:`~pandas_gbq.read_gbq` method infers the pandas dtype for each column,
 BigQuery Data Type dtype
 ================== =========================
 FLOAT              float
------------------- -------------------------
-TIMESTAMP          **pandas versions 0.24.0+**
-                     :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and
-                     ``tz='UTC'``
-                   **Earlier versions**
-                     datetime64[ns]
------------------- -------------------------
+TIMESTAMP          :class:`~pandas.DatetimeTZDtype` with ``unit='ns'`` and ``tz='UTC'``
 DATETIME           datetime64[ns]
 TIME               datetime64[ns]
 DATE               datetime64[ns]
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 8b60bad4..30714b49 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -12,6 +12,7 @@
 BIGQUERY_INSTALLED_VERSION = None
 SHOW_VERBOSE_DEPRECATION = False
 SHOW_PRIVATE_KEY_DEPRECATION = False
+USE_TZAWARE_TIMESTAMP = False
 PRIVATE_KEY_DEPRECATION_MESSAGE = (
     "private_key is deprecated and will be removed in a future version."
     "Use the credentials argument instead. See "
@@ -26,7 +27,7 @@
 
 
 def _check_google_client_version():
-    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION
+    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP
 
     try:
         import pkg_resources
@@ -61,6 +62,12 @@ def _check_google_client_version():
     SHOW_PRIVATE_KEY_DEPRECATION = (
         pandas_installed_version >= pandas_version_with_credentials_arg
     )
+    pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version(
+        "0.24.0"
+    )
+    USE_TZAWARE_TIMESTAMP = (
+        pandas_installed_version >= pandas_version_supporting_tzaware_dtype
+    )
 
 
 def _test_google_api_imports():
@@ -494,6 +501,9 @@ def run_query(self, query, **kwargs):
         if df.empty:
             df = _cast_empty_df_dtypes(schema_fields, df)
 
+        if not USE_TZAWARE_TIMESTAMP:
+            df = _localize_df(schema_fields, df)
+
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
 
@@ -644,21 +654,28 @@ def delete_and_recreate_table(self, dataset_id, table_id, table_schema):
 
 
 def _bqschema_to_nullsafe_dtypes(schema_fields):
-    # Only specify dtype when the dtype allows nulls. Otherwise, use pandas's
-    # default dtype choice.
-    #
-    # See:
-    # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
-    # #missing-data-casting-rules-and-indexing
-    #
+    """Specify explicit dtypes based on BigQuery schema.
+
+    This function only specifies a dtype when the dtype allows nulls.
+    Otherwise, use pandas's default dtype choice.
+
+    See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
+    #missing-data-casting-rules-and-indexing
+    """
+    import pandas.api.types
+
+    # pandas doesn't support timezone-aware dtype in DataFrame/Series
+    # constructors until 0.24.0. See:
+    # https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947
+    timestamp_dtype = "datetime64[ns]"
+    if USE_TZAWARE_TIMESTAMP:
+        timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC")
+
     # If you update this mapping, also update the table at
     # `docs/source/reading.rst`.
     dtype_map = {
         "FLOAT": np.dtype(float),
-        # Even though TIMESTAMPs are timezone-aware in BigQuery, pandas doesn't
-        # support datetime64[ns, UTC] as dtype in DataFrame constructors. See:
-        # https://github.com/pandas-dev/pandas/issues/12513
-        "TIMESTAMP": "datetime64[ns]",
+        "TIMESTAMP": timestamp_dtype,
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
@@ -705,6 +722,24 @@ def _cast_empty_df_dtypes(schema_fields, df):
     return df
 
 
+def _localize_df(schema_fields, df):
+    """Localize any TIMESTAMP columns to tz-aware type.
+
+    In pandas versions before 0.24.0, DatetimeTZDtype cannot be used as the
+    dtype in Series/DataFrame construction, so localize those columns after
+    the DataFrame is constructed.
+    """
+    for field in schema_fields:
+        column = str(field["name"])
+        if field["mode"].upper() == "REPEATED":
+            continue
+
+        if field["type"].upper() == "TIMESTAMP":
+            df[column] = df[column].dt.tz_localize("UTC")
+
+    return df
+
+
 def read_gbq(
     query,
     project_id=None,
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 4480f203..427137b9 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -310,13 +310,12 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
-        tm.assert_frame_equal(
-            df,
-            DataFrame(
-                {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
-                dtype="datetime64[ns]",
-            ),
+        expected = DataFrame(
+            {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
+            dtype="datetime64[ns]",
         )
+        expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC")
+        tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_arbitrary_timestamp(self, project_id):
         query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp'
@@ -326,13 +325,14 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
-        tm.assert_frame_equal(
-            df,
-            DataFrame(
-                {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
-                dtype="datetime64[ns]",
-            ),
+        expected = DataFrame(
+            {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
+            dtype="datetime64[ns]",
         )
+        expected["valid_timestamp"] = expected[
+            "valid_timestamp"
+        ].dt.tz_localize("UTC")
+        tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_datetime_unix_epoch(self, project_id):
         query = 'SELECT DATETIME("1970-01-01 00:00:00") AS unix_epoch'
@@ -368,7 +368,7 @@ def test_should_properly_handle_arbitrary_datetime(self, project_id):
         "expression, is_expected_dtype",
         [
             ("current_date()", pandas.api.types.is_datetime64_ns_dtype),
-            ("current_timestamp()", pandas.api.types.is_datetime64_ns_dtype),
+            ("current_timestamp()", pandas.api.types.is_datetime64tz_dtype),
             ("current_datetime()", pandas.api.types.is_datetime64_ns_dtype),
             ("TRUE", pandas.api.types.is_bool_dtype),
             ("FALSE", pandas.api.types.is_bool_dtype),
@@ -402,9 +402,11 @@ def test_should_properly_handle_null_timestamp(self, project_id):
             credentials=self.credentials,
             dialect="legacy",
         )
-        tm.assert_frame_equal(
-            df, DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]")
+        expected = DataFrame({"null_timestamp": [NaT]}, dtype="datetime64[ns]")
+        expected["null_timestamp"] = expected["null_timestamp"].dt.tz_localize(
+            "UTC"
         )
+        tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_null_datetime(self, project_id):
         query = "SELECT CAST(NULL AS DATETIME) AS null_datetime"
@@ -594,6 +596,7 @@ def test_zero_rows(self, project_id):
         expected_result = DataFrame(
             empty_columns, columns=["title", "id", "is_bot", "ts"]
         )
+        expected_result["ts"] = expected_result["ts"].dt.tz_localize("UTC")
         tm.assert_frame_equal(df, expected_result, check_index_type=False)
 
     def test_one_row_one_column(self, project_id):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 3a047741..f0fa7ce6 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -90,7 +90,7 @@ def no_auth(monkeypatch):
         ("INTEGER", None),  # Can't handle NULL
         ("BOOLEAN", None),  # Can't handle NULL
         ("FLOAT", numpy.dtype(float)),
-        ("TIMESTAMP", "datetime64[ns]"),
+        ("TIMESTAMP", "datetime64[ns, UTC]"),
         ("DATETIME", "datetime64[ns]"),
     ],
 )

From daf315e0f446307e9e7d81d3c9d9bb58bffae921 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 14:47:45 -0700
Subject: [PATCH 3/7] Fix unit tests on old pandas

---
 tests/unit/test_gbq.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index f0fa7ce6..267a995e 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -1,21 +1,26 @@
 # -*- coding: utf-8 -*-
 
-import pandas.util.testing as tm
-import pytest
+try:
+    import mock
+except ImportError:  # pragma: NO COVER
+    from unittest import mock
+
 import numpy
 from pandas import DataFrame
+import pandas.util.testing as tm
+import pkg_resources
+import pytest
 
 import pandas_gbq.exceptions
 from pandas_gbq import gbq
 
-try:
-    import mock
-except ImportError:  # pragma: NO COVER
-    from unittest import mock
 
 pytestmark = pytest.mark.filter_warnings(
     "ignore:credentials from Google Cloud SDK"
 )
+pandas_installed_version = pkg_resources.get_distribution(
+    "pandas"
+).parsed_version
 
 
 @pytest.fixture
@@ -90,7 +95,6 @@ def no_auth(monkeypatch):
         ("INTEGER", None),  # Can't handle NULL
         ("BOOLEAN", None),  # Can't handle NULL
         ("FLOAT", numpy.dtype(float)),
-        ("TIMESTAMP", "datetime64[ns, UTC]"),
         ("DATETIME", "datetime64[ns]"),
     ],
 )
@@ -104,6 +108,16 @@ def test_should_return_bigquery_correctly_typed(type_, expected):
         assert result == {"x": expected}
 
 
+def test_should_return_bigquery_correctly_typed_timestamp():
+    result = gbq._bqschema_to_nullsafe_dtypes(
+        [dict(name="x", type="TIMESTAMP", mode="NULLABLE")]
+    )
+    if pandas_installed_version < pkg_resources.parse_version("0.24.0"):
+        assert result == {"x": "datetime64[ns]"}
+    else:
+        assert result == {"x": "datetime64[ns, UTC]"}
+
+
 def test_to_gbq_should_fail_if_invalid_table_name_passed():
     with pytest.raises(gbq.NotFoundException):
         gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234")
@@ -200,6 +214,10 @@ def test_to_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version):
         assert len(recwarn) == 0
 
 
+@pytest.mark.skipif(
+    pandas_installed_version < pkg_resources.parse_version("0.24.0"),
+    reason="Requires pandas 0.24+",
+)
 def test_to_gbq_with_private_key_new_pandas_warns_deprecation(
     min_bq_version, monkeypatch
 ):
@@ -413,6 +431,10 @@ def test_read_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version):
         assert len(recwarn) == 0
 
 
+@pytest.mark.skipif(
+    pandas_installed_version < pkg_resources.parse_version("0.24.0"),
+    reason="Requires pandas 0.24+",
+)
 def test_read_gbq_with_private_key_new_pandas_warns_deprecation(
     min_bq_version, monkeypatch
 ):

From a0fa0a56c18c725c355d0cc43a32a8d752864296 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 14:50:02 -0700
Subject: [PATCH 4/7] Add tz-aware change back to changelog.

---
 docs/source/changelog.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index e97f1f3d..d710b37f 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -33,11 +33,14 @@ Internal changes
 
 Enhancements
 ~~~~~~~~~~~~
+
 - Allow ``table_schema`` in :func:`to_gbq` to contain only a subset of columns,
   with the rest being populated using the DataFrame dtypes (:issue:`218`)
   (contributed by @johnpaton)
 - Read ``project_id`` in :func:`to_gbq` from provided ``credentials`` if
   available (contributed by @daureg)
+- ``read_gbq`` uses the timezone-aware ``DatetimeTZDtype(unit='ns',
+  tz='UTC')`` dtype for BigQuery ``TIMESTAMP`` columns. (:issue:`269`)
 
 .. _changelog-0.9.0:
 

From 65d31ebccffa4970d57b1c2f4121e6148ce68800 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 15:03:04 -0700
Subject: [PATCH 5/7] Don't localize TIMESTAMP columns if they are already
 tz-aware.

---
 pandas_gbq/gbq.py        | 29 ++++++++---------------------
 tests/system/test_gbq.py |  7 ++++---
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 30714b49..b9978887 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -12,7 +12,6 @@
 BIGQUERY_INSTALLED_VERSION = None
 SHOW_VERBOSE_DEPRECATION = False
 SHOW_PRIVATE_KEY_DEPRECATION = False
-USE_TZAWARE_TIMESTAMP = False
 PRIVATE_KEY_DEPRECATION_MESSAGE = (
     "private_key is deprecated and will be removed in a future version."
     "Use the credentials argument instead. See "
@@ -27,7 +26,7 @@
 
 
 def _check_google_client_version():
-    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION, USE_TZAWARE_TIMESTAMP
+    global BIGQUERY_INSTALLED_VERSION, SHOW_VERBOSE_DEPRECATION, SHOW_PRIVATE_KEY_DEPRECATION
 
     try:
         import pkg_resources
@@ -62,12 +61,6 @@ def _check_google_client_version():
     SHOW_PRIVATE_KEY_DEPRECATION = (
         pandas_installed_version >= pandas_version_with_credentials_arg
     )
-    pandas_version_supporting_tzaware_dtype = pkg_resources.parse_version(
-        "0.24.0"
-    )
-    USE_TZAWARE_TIMESTAMP = (
-        pandas_installed_version >= pandas_version_supporting_tzaware_dtype
-    )
 
 
 def _test_google_api_imports():
@@ -501,8 +494,8 @@ def run_query(self, query, **kwargs):
         if df.empty:
             df = _cast_empty_df_dtypes(schema_fields, df)
 
-        if not USE_TZAWARE_TIMESTAMP:
-            df = _localize_df(schema_fields, df)
+        # Ensure any TIMESTAMP columns are tz-aware.
+        df = _localize_df(schema_fields, df)
 
         logger.debug("Got {} rows.\n".format(rows_iter.total_rows))
         return df
@@ -662,20 +655,14 @@ def _bqschema_to_nullsafe_dtypes(schema_fields):
     See: http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     #missing-data-casting-rules-and-indexing
     """
-    import pandas.api.types
-
-    # pandas doesn't support timezone-aware dtype in DataFrame/Series
-    # constructors until 0.24.0. See:
-    # https://github.com/pandas-dev/pandas/issues/25843#issuecomment-479656947
-    timestamp_dtype = "datetime64[ns]"
-    if USE_TZAWARE_TIMESTAMP:
-        timestamp_dtype = pandas.api.types.DatetimeTZDtype(unit="ns", tz="UTC")
-
     # If you update this mapping, also update the table at
     # `docs/source/reading.rst`.
     dtype_map = {
         "FLOAT": np.dtype(float),
-        "TIMESTAMP": timestamp_dtype,
+        # pandas doesn't support timezone-aware dtype in DataFrame/Series
+        # constructors. It's more idiomatic to localize after construction.
+        # https://github.com/pandas-dev/pandas/issues/25843
+        "TIMESTAMP": "datetime64[ns]",
         "TIME": "datetime64[ns]",
         "DATE": "datetime64[ns]",
         "DATETIME": "datetime64[ns]",
@@ -734,7 +721,7 @@ def _localize_df(schema_fields, df):
         if field["mode"].upper() == "REPEATED":
             continue
 
-        if field["type"].upper() == "TIMESTAMP":
+        if field["type"].upper() == "TIMESTAMP" and df[column].dt.tz is None:
             df[column] = df[column].dt.tz_localize("UTC")
 
     return df
diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 427137b9..11b10952 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -329,9 +329,10 @@ def test_should_properly_handle_arbitrary_timestamp(self, project_id):
             {"valid_timestamp": ["2004-09-15T05:00:00.000000Z"]},
             dtype="datetime64[ns]",
         )
-        expected["valid_timestamp"] = expected[
-            "valid_timestamp"
-        ].dt.tz_localize("UTC")
+        if expected["valid_timestamp"].dt.tz is None:
+            expected["valid_timestamp"] = expected[
+                "valid_timestamp"
+            ].dt.tz_localize("UTC")
         tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_datetime_unix_epoch(self, project_id):

From 7db8b1f91bb0c27fce8ce8a9272510ba3726e887 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 15:05:18 -0700
Subject: [PATCH 6/7] Fix tests

---
 tests/system/test_gbq.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index 11b10952..ce65f4e2 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -314,7 +314,8 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id):
             {"unix_epoch": ["1970-01-01T00:00:00.000000Z"]},
             dtype="datetime64[ns]",
         )
-        expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC")
+        if expected["unix_epoch"].dt.tz is None:
+            expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC")
         tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_arbitrary_timestamp(self, project_id):

From b933dfce15d09b3d57f48c855c3c56576552b3f6 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 3 Apr 2019 15:46:11 -0700
Subject: [PATCH 7/7] Fix unit test. Blacken

---
 tests/system/test_gbq.py |  4 +++-
 tests/unit/test_gbq.py   | 12 ++----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/tests/system/test_gbq.py b/tests/system/test_gbq.py
index ce65f4e2..6c876068 100644
--- a/tests/system/test_gbq.py
+++ b/tests/system/test_gbq.py
@@ -315,7 +315,9 @@ def test_should_properly_handle_timestamp_unix_epoch(self, project_id):
             dtype="datetime64[ns]",
         )
         if expected["unix_epoch"].dt.tz is None:
-            expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize("UTC")
+            expected["unix_epoch"] = expected["unix_epoch"].dt.tz_localize(
+                "UTC"
+            )
         tm.assert_frame_equal(df, expected)
 
     def test_should_properly_handle_arbitrary_timestamp(self, project_id):
diff --git a/tests/unit/test_gbq.py b/tests/unit/test_gbq.py
index 267a995e..6956be20 100644
--- a/tests/unit/test_gbq.py
+++ b/tests/unit/test_gbq.py
@@ -95,6 +95,8 @@ def no_auth(monkeypatch):
         ("INTEGER", None),  # Can't handle NULL
         ("BOOLEAN", None),  # Can't handle NULL
         ("FLOAT", numpy.dtype(float)),
+        # TIMESTAMP will be localized after DataFrame construction.
+        ("TIMESTAMP", "datetime64[ns]"),
         ("DATETIME", "datetime64[ns]"),
     ],
 )
@@ -108,16 +110,6 @@ def test_should_return_bigquery_correctly_typed(type_, expected):
         assert result == {"x": expected}
 
 
-def test_should_return_bigquery_correctly_typed_timestamp():
-    result = gbq._bqschema_to_nullsafe_dtypes(
-        [dict(name="x", type="TIMESTAMP", mode="NULLABLE")]
-    )
-    if pandas_installed_version < pkg_resources.parse_version("0.24.0"):
-        assert result == {"x": "datetime64[ns]"}
-    else:
-        assert result == {"x": "datetime64[ns, UTC]"}
-
-
 def test_to_gbq_should_fail_if_invalid_table_name_passed():
     with pytest.raises(gbq.NotFoundException):
         gbq.to_gbq(DataFrame([[1]]), "invalid_table_name", project_id="1234")