From 6965558f1dd564d7024c113a5fb29fdf08ec4b11 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 30 Dec 2019 16:58:09 -0600
Subject: [PATCH 1/7] fix(bigquery): write pandas datetime[ns] columns to
 BigQuery TIMESTAMP columns

Also:

* Enable TIMESTAMP and DATETIME unit tests for `_pandas_helpers`.
* Add more data types to load dataframe sample.
---
 .../google/cloud/bigquery/_pandas_helpers.py  |  8 +-
 bigquery/samples/load_table_dataframe.py      | 52 ++++++++++--
 .../tests/test_load_table_dataframe.py        | 67 ++++++++++++++-
 bigquery/tests/unit/test__pandas_helpers.py   | 84 ++++++++++++-------
 4 files changed, 173 insertions(+), 38 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py
index 6e91a9624b06..aa017b904720 100644
--- a/bigquery/google/cloud/bigquery/_pandas_helpers.py
+++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py
@@ -52,7 +52,11 @@
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
     "datetime64[ns, UTC]": "TIMESTAMP",
-    "datetime64[ns]": "DATETIME",
+    # Due to internal bug 147108331, BigQuery always interprets DATETIME
+    # columns as having the wrong precision. In the meantime, workaround this
+    # by writing the values as TIMESTAMP. See:
+    # https://github.com/googleapis/google-cloud-python/issues/9996
+    "datetime64[ns]": "TIMESTAMP",
     "float32": "FLOAT",
     "float64": "FLOAT",
     "int8": "INTEGER",
@@ -218,7 +222,7 @@ def bq_to_arrow_array(series, bq_field):
         return pyarrow.ListArray.from_pandas(series, type=arrow_type)
     if field_type_upper in schema._STRUCT_TYPES:
         return pyarrow.StructArray.from_pandas(series, type=arrow_type)
-    return pyarrow.array(series, type=arrow_type)
+    return pyarrow.Array.from_pandas(series, type=arrow_type)
 
 
 def get_column_or_index(dataframe, name):
diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py
index 8cfb34424457..86227164638f 100644
--- a/bigquery/samples/load_table_dataframe.py
+++ b/bigquery/samples/load_table_dataframe.py
@@ -16,9 +16,11 @@
 def load_table_dataframe(client, table_id):
 
     # [START bigquery_load_table_dataframe]
-    from google.cloud import bigquery
+    import datetime
 
+    from google.cloud import bigquery
     import pandas
+    import pytz
 
     # TODO(developer): Construct a BigQuery client object.
     # client = bigquery.Client()
@@ -27,16 +29,54 @@ def load_table_dataframe(client, table_id):
     # table_id = "your-project.your_dataset.your_table_name"
 
     records = [
-        {"title": u"The Meaning of Life", "release_year": 1983},
-        {"title": u"Monty Python and the Holy Grail", "release_year": 1975},
-        {"title": u"Life of Brian", "release_year": 1979},
-        {"title": u"And Now for Something Completely Different", "release_year": 1971},
+        {
+            "title": u"The Meaning of Life",
+            "release_year": 1983,
+            "length_minutes": 112.5,
+            "release_date": datetime.datetime(
+                1983, 5, 9, 13, 0, 0, tzinfo=pytz.timezone("Europe/Paris")
+            ),
+            "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0),
+        },
+        {
+            "title": u"Monty Python and the Holy Grail",
+            "release_year": 1975,
+            "length_minutes": 91.5,
+            "release_date": datetime.datetime(
+                1975, 4, 9, 23, 59, 2, tzinfo=pytz.timezone("Europe/London")
+            ),
+            "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0),
+        },
+        {
+            "title": u"Life of Brian",
+            "release_year": 1979,
+            "length_minutes": 94.25,
+            "release_date": datetime.datetime(
+                1979, 8, 17, 23, 59, 5, tzinfo=pytz.timezone("America/New_York")
+            ),
+            "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0),
+        },
+        {
+            "title": u"And Now for Something Completely Different",
+            "release_year": 1971,
+            "length_minutes": 88.0,
+            "release_date": datetime.datetime(
+                1971, 9, 28, 23, 59, 7, tzinfo=pytz.timezone("Europe/London")
+            ),
+            "dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0),
+        },
     ]
     dataframe = pandas.DataFrame(
         records,
         # In the loaded table, the column order reflects the order of the
         # columns in the DataFrame.
-        columns=["title", "release_year"],
+        columns=[
+            "title",
+            "release_year",
+            "length_minutes",
+            "release_date",
+            "dvd_release",
+        ],
         # Optionally, set a named index, which can also be written to the
         # BigQuery table.
         index=pandas.Index(
diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py
index 3b7cb16ea692..e5cfdddf08c2 100644
--- a/bigquery/samples/tests/test_load_table_dataframe.py
+++ b/bigquery/samples/tests/test_load_table_dataframe.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
+
 import pytest
+import pytz
 
 from .. import load_table_dataframe
 
@@ -25,7 +28,67 @@ def test_load_table_dataframe(capsys, client, random_table_id):
 
     table = load_table_dataframe.load_table_dataframe(client, random_table_id)
     out, _ = capsys.readouterr()
-    assert "Loaded 4 rows and 3 columns" in out
+    expected_column_names = [
+        "wikidata_id",
+        "title",
+        "release_year",
+        "length_minutes",
+        "release_date",
+        "dvd_release",
+    ]
+    assert "Loaded 4 rows and {} columns".format(len(expected_column_names)) in out
 
     column_names = [field.name for field in table.schema]
-    assert column_names == ["wikidata_id", "title", "release_year"]
+    assert column_names == expected_column_names
+    column_types = [field.field_type for field in table.schema]
+    assert column_types == [
+        "STRING",
+        "STRING",
+        "INTEGER",
+        "FLOAT",
+        "TIMESTAMP",
+        # Due to internal bug 147108331, BigQuery always interprets DATETIME
+        # columns as having the wrong precision. In the meantime, workaround this
+        # by writing the values as TIMESTAMP. See:
+        # https://github.com/googleapis/google-cloud-python/issues/9996
+        "TIMESTAMP",
+    ]
+
+    df = client.list_rows(table).to_dataframe()
+    df.sort_values("release_year", inplace=True)
+    assert df["title"].tolist() == [
+        u"And Now for Something Completely Different",
+        u"Monty Python and the Holy Grail",
+        u"Life of Brian",
+        u"The Meaning of Life",
+    ]
+    assert df["release_year"].tolist() == [
+        1971,
+        1975,
+        1979,
+        1983,
+    ]
+    assert df["length_minutes"].tolist() == [
+        88.0,
+        91.5,
+        94.25,
+        112.5,
+    ]
+    assert df["release_date"].tolist() == [
+        pandas.Timestamp("1971-09-28T23:59:07+00:00"),
+        pandas.Timestamp("1975-04-09T23:59:02+00:00"),
+        pandas.Timestamp("1979-08-17T23:59:05+00:00"),
+        pandas.Timestamp("1983-05-09T13:00:00+00:00"),
+    ]
+    assert df["dvd_release"].tolist() == [
+        pandas.Timestamp("2003-10-22T10:00:00+00:00"),
+        pandas.Timestamp("2002-07-16T09:00:00+00:00"),
+        pandas.Timestamp("2008-01-14T08:00:00+00:00"),
+        pandas.Timestamp("2002-01-22T07:00:00+00:00"),
+    ]
+    assert df["wikidata_id"].tolist() == [
+        u"Q16403",
+        u"Q25043",
+        u"Q24953",
+        u"Q24980",
+    ]
diff --git a/bigquery/tests/unit/test__pandas_helpers.py b/bigquery/tests/unit/test__pandas_helpers.py
index b2d74d54e120..ac613fd50e6f 100644
--- a/bigquery/tests/unit/test__pandas_helpers.py
+++ b/bigquery/tests/unit/test__pandas_helpers.py
@@ -92,6 +92,7 @@ def test_is_datetime():
     assert is_datetime(pyarrow.timestamp("us", tz=None))
     assert not is_datetime(pyarrow.timestamp("ms", tz=None))
     assert not is_datetime(pyarrow.timestamp("us", tz="UTC"))
+    assert not is_datetime(pyarrow.timestamp("ns", tz="UTC"))
     assert not is_datetime(pyarrow.string())
 
 
@@ -386,20 +387,15 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
         ),
         ("BOOLEAN", [True, None, False, None]),
         ("BOOL", [False, None, True, None]),
-        # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is
-        # resolved, test with TIMESTAMP column. Conversion from pyarrow
-        # TimestampArray to list of Python objects fails with OverflowError:
-        # Python int too large to convert to C long.
-        #
-        # (
-        #     "TIMESTAMP",
-        #     [
-        #         datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
-        #         None,
-        #         datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
-        #         datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
-        #     ],
-        # ),
+        (
+            "TIMESTAMP",
+            [
+                datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
+                None,
+                datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
+                datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
+            ],
+        ),
         (
             "DATE",
             [
@@ -418,20 +414,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
                 datetime.time(12, 0, 0),
             ],
         ),
-        # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is
-        # resolved, test with DATETIME column. Conversion from pyarrow
-        # TimestampArray to list of Python objects fails with OverflowError:
-        # Python int too large to convert to C long.
-        #
-        # (
-        #     "DATETIME",
-        #     [
-        #         datetime.datetime(1, 1, 1, 0, 0, 0),
-        #         None,
-        #         datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
-        #         datetime.datetime(1970, 1, 1, 0, 0, 0),
-        #     ],
-        # ),
+        (
+            "DATETIME",
+            [
+                datetime.datetime(1, 1, 1, 0, 0, 0),
+                datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+                None,
+                datetime.datetime(1970, 1, 1, 0, 0, 0),
+                datetime.datetime(1999, 3, 14, 15, 9, 26, 535898),
+            ],
+        ),
         (
             "GEOGRAPHY",
             [
@@ -453,6 +445,42 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows):
     assert rows == roundtrip
 
 
+@pytest.mark.parametrize(
+    "bq_type,rows",
+    [
+        (
+            "TIMESTAMP",
+            [
+                    "1971-09-28T23:59:07+00:00",
+                    "1975-04-09T23:59:02+00:00",
+                    "1979-08-17T23:59:05+00:00",
+                    "NaT",
+                    "1983-05-09T13:00:00+00:00",
+            ],
+        ),
+        (
+            "DATETIME",
+            [
+                    "1971-09-28T23:59:07",
+                    "1975-04-09T23:59:02",
+                    "1979-08-17T23:59:05",
+                    "NaT",
+                    "1983-05-09T13:00:00",
+            ],
+        ),
+    ],
+)
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows):
+    rows = [pandas.Timestamp(row) for row in rows]
+    series = pandas.Series(rows)
+    bq_field = schema.SchemaField("field_name", bq_type)
+    arrow_array = module_under_test.bq_to_arrow_array(series, bq_field)
+    roundtrip = arrow_array.to_pandas()
+    assert series.equals(roundtrip)
+
+
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
 def test_bq_to_arrow_array_w_arrays(module_under_test):

From 58ed3dc180fef0f9c343c8fa572b3a781ab8e8ca Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 2 Jan 2020 17:15:37 -0600
Subject: [PATCH 2/7] blacken

---
 bigquery/tests/unit/test__pandas_helpers.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/bigquery/tests/unit/test__pandas_helpers.py b/bigquery/tests/unit/test__pandas_helpers.py
index ac613fd50e6f..6adf098c03c8 100644
--- a/bigquery/tests/unit/test__pandas_helpers.py
+++ b/bigquery/tests/unit/test__pandas_helpers.py
@@ -451,21 +451,21 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows):
         (
             "TIMESTAMP",
             [
-                    "1971-09-28T23:59:07+00:00",
-                    "1975-04-09T23:59:02+00:00",
-                    "1979-08-17T23:59:05+00:00",
-                    "NaT",
-                    "1983-05-09T13:00:00+00:00",
+                "1971-09-28T23:59:07+00:00",
+                "1975-04-09T23:59:02+00:00",
+                "1979-08-17T23:59:05+00:00",
+                "NaT",
+                "1983-05-09T13:00:00+00:00",
             ],
         ),
         (
             "DATETIME",
             [
-                    "1971-09-28T23:59:07",
-                    "1975-04-09T23:59:02",
-                    "1979-08-17T23:59:05",
-                    "NaT",
-                    "1983-05-09T13:00:00",
+                "1971-09-28T23:59:07",
+                "1975-04-09T23:59:02",
+                "1979-08-17T23:59:05",
+                "NaT",
+                "1983-05-09T13:00:00",
             ],
         ),
     ],

From de673650f7bb0a554cf89efaa10be797f7ea511d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 2 Jan 2020 17:16:22 -0600
Subject: [PATCH 3/7] lint

---
 bigquery/samples/tests/test_load_table_dataframe.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py
index e5cfdddf08c2..86c42dde55d5 100644
--- a/bigquery/samples/tests/test_load_table_dataframe.py
+++ b/bigquery/samples/tests/test_load_table_dataframe.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import datetime
-
 import pytest
-import pytz
 
 from .. import load_table_dataframe
 

From 36fee787290061d7520427f04bbdc6a481a22691 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 2 Jan 2020 17:21:21 -0600
Subject: [PATCH 4/7] update client tests

---
 bigquery/tests/unit/test_client.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
index a5100fe6eaef..82fd5cc80c55 100644
--- a/bigquery/tests/unit/test_client.py
+++ b/bigquery/tests/unit/test_client.py
@@ -6389,7 +6389,11 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
             SchemaField("int_col", "INTEGER"),
             SchemaField("float_col", "FLOAT"),
             SchemaField("bool_col", "BOOLEAN"),
-            SchemaField("dt_col", "DATETIME"),
+            # Due to internal bug 147108331, BigQuery always interprets DATETIME
+            # columns as having the wrong precision. In the meantime, workaround this
+            # by writing the values as TIMESTAMP. See:
+            # https://github.com/googleapis/google-cloud-python/issues/9996
+            SchemaField("dt_col", "TIMESTAMP"),
             SchemaField("ts_col", "TIMESTAMP"),
         )
 
@@ -6635,7 +6639,11 @@ def test_load_table_from_dataframe_w_partial_schema(self):
             SchemaField("int_as_float_col", "INTEGER"),
             SchemaField("float_col", "FLOAT"),
             SchemaField("bool_col", "BOOLEAN"),
-            SchemaField("dt_col", "DATETIME"),
+            # Due to internal bug 147108331, BigQuery always interprets DATETIME
+            # columns as having the wrong precision. In the meantime, workaround this
+            # by writing the values as TIMESTAMP. See:
+            # https://github.com/googleapis/google-cloud-python/issues/9996
+            SchemaField("dt_col", "TIMESTAMP"),
             SchemaField("ts_col", "TIMESTAMP"),
             SchemaField("string_col", "STRING"),
             SchemaField("bytes_col", "BYTES"),

From 4fda7f5312eb1780e23a7d09d52926d1a1b6960a Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 3 Jan 2020 09:25:19 -0600
Subject: [PATCH 5/7] doc: show timezone conversions for timestamp columns

Pandas doesn't automatically convert datetime objects to UTC time, so
show how to do this in the code sample.
---
 bigquery/samples/load_table_dataframe.py      | 24 +++++++--------
 .../tests/test_load_table_dataframe.py        | 29 +++++--------------
 bigquery/tests/system.py                      |  7 ++++-
 3 files changed, 25 insertions(+), 35 deletions(-)

diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py
index 86227164638f..e27ac0aabde6 100644
--- a/bigquery/samples/load_table_dataframe.py
+++ b/bigquery/samples/load_table_dataframe.py
@@ -33,36 +33,36 @@ def load_table_dataframe(client, table_id):
             "title": u"The Meaning of Life",
             "release_year": 1983,
             "length_minutes": 112.5,
-            "release_date": datetime.datetime(
-                1983, 5, 9, 13, 0, 0, tzinfo=pytz.timezone("Europe/Paris")
-            ),
+            "release_date": pytz.timezone("Europe/Paris")
+            .localize(datetime.datetime(1983, 5, 9, 13, 0, 0))
+            .astimezone(pytz.utc),
             "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0),
         },
         {
             "title": u"Monty Python and the Holy Grail",
             "release_year": 1975,
             "length_minutes": 91.5,
-            "release_date": datetime.datetime(
-                1975, 4, 9, 23, 59, 2, tzinfo=pytz.timezone("Europe/London")
-            ),
+            "release_date": pytz.timezone("Europe/London")
+            .localize(datetime.datetime(1975, 4, 9, 23, 59, 2))
+            .astimezone(pytz.utc),
             "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0),
         },
         {
             "title": u"Life of Brian",
             "release_year": 1979,
             "length_minutes": 94.25,
-            "release_date": datetime.datetime(
-                1979, 8, 17, 23, 59, 5, tzinfo=pytz.timezone("America/New_York")
-            ),
+            "release_date": pytz.timezone("America/New_York")
+            .localize(datetime.datetime(1979, 8, 17, 23, 59, 5))
+            .astimezone(pytz.utc),
             "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0),
         },
         {
             "title": u"And Now for Something Completely Different",
             "release_year": 1971,
             "length_minutes": 88.0,
-            "release_date": datetime.datetime(
-                1971, 9, 28, 23, 59, 7, tzinfo=pytz.timezone("Europe/London")
-            ),
+            "release_date": pytz.timezone("Europe/London")
+            .localize(datetime.datetime(1971, 9, 28, 23, 59, 7))
+            .astimezone(pytz.utc),
             "dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0),
         },
     ]
diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py
index 86c42dde55d5..05025b041ed0 100644
--- a/bigquery/samples/tests/test_load_table_dataframe.py
+++ b/bigquery/samples/tests/test_load_table_dataframe.py
@@ -59,23 +59,13 @@ def test_load_table_dataframe(capsys, client, random_table_id):
         u"Life of Brian",
         u"The Meaning of Life",
     ]
-    assert df["release_year"].tolist() == [
-        1971,
-        1975,
-        1979,
-        1983,
-    ]
-    assert df["length_minutes"].tolist() == [
-        88.0,
-        91.5,
-        94.25,
-        112.5,
-    ]
+    assert df["release_year"].tolist() == [1971, 1975, 1979, 1983]
+    assert df["length_minutes"].tolist() == [88.0, 91.5, 94.25, 112.5]
     assert df["release_date"].tolist() == [
-        pandas.Timestamp("1971-09-28T23:59:07+00:00"),
-        pandas.Timestamp("1975-04-09T23:59:02+00:00"),
-        pandas.Timestamp("1979-08-17T23:59:05+00:00"),
-        pandas.Timestamp("1983-05-09T13:00:00+00:00"),
+        pandas.Timestamp("1971-09-28T22:59:07+00:00"),
+        pandas.Timestamp("1975-04-09T22:59:02+00:00"),
+        pandas.Timestamp("1979-08-18T03:59:05+00:00"),
+        pandas.Timestamp("1983-05-09T11:00:00+00:00"),
     ]
     assert df["dvd_release"].tolist() == [
         pandas.Timestamp("2003-10-22T10:00:00+00:00"),
@@ -83,9 +73,4 @@ def test_load_table_dataframe(capsys, client, random_table_id):
         pandas.Timestamp("2008-01-14T08:00:00+00:00"),
         pandas.Timestamp("2002-01-22T07:00:00+00:00"),
     ]
-    assert df["wikidata_id"].tolist() == [
-        u"Q16403",
-        u"Q25043",
-        u"Q24953",
-        u"Q24980",
-    ]
+    assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"]
diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py
index b431f628d001..234b1d925ab1 100644
--- a/bigquery/tests/system.py
+++ b/bigquery/tests/system.py
@@ -719,7 +719,12 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
             (
                 bigquery.SchemaField("bool_col", "BOOLEAN"),
                 bigquery.SchemaField("ts_col", "TIMESTAMP"),
-                bigquery.SchemaField("dt_col", "DATETIME"),
+                # Due to internal bug 147108331, BigQuery always interprets
+                # DATETIME columns as having the wrong precision. In the
+                # meantime, workaround this by writing the values as TIMESTAMP.
+                # See:
+                # https://github.com/googleapis/google-cloud-python/issues/9996
+                bigquery.SchemaField("dt_col", "TIMESTAMP"),
                 bigquery.SchemaField("float32_col", "FLOAT"),
                 bigquery.SchemaField("float64_col", "FLOAT"),
                 bigquery.SchemaField("int8_col", "INTEGER"),

From f7531e0581fc28e12ab86adc180d4a661e32cb4d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 9 Jan 2020 11:46:04 -0600
Subject: [PATCH 6/7] doc: update comments to indicate desired use of TIMESTAMP

---
 bigquery/google/cloud/bigquery/_pandas_helpers.py   | 6 ++----
 bigquery/samples/load_table_dataframe.py            | 1 +
 bigquery/samples/tests/test_load_table_dataframe.py | 4 ----
 bigquery/tests/system.py                            | 6 ++----
 bigquery/tests/unit/test_client.py                  | 8 --------
 5 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py
index aa017b904720..645478ff6d4b 100644
--- a/bigquery/google/cloud/bigquery/_pandas_helpers.py
+++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py
@@ -52,10 +52,8 @@
 _PANDAS_DTYPE_TO_BQ = {
     "bool": "BOOLEAN",
     "datetime64[ns, UTC]": "TIMESTAMP",
-    # Due to internal bug 147108331, BigQuery always interprets DATETIME
-    # columns as having the wrong precision. In the meantime, workaround this
-    # by writing the values as TIMESTAMP. See:
-    # https://github.com/googleapis/google-cloud-python/issues/9996
+    # BigQuery does not support uploading DATETIME values from Parquet files.
+    # See: https://github.com/googleapis/google-cloud-python/issues/9996
     "datetime64[ns]": "TIMESTAMP",
     "float32": "FLOAT",
     "float64": "FLOAT",
diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py
index 86b6dbaedf98..91dd6e9f09fc 100644
--- a/bigquery/samples/load_table_dataframe.py
+++ b/bigquery/samples/load_table_dataframe.py
@@ -36,6 +36,7 @@ def load_table_dataframe(table_id):
             "release_date": pytz.timezone("Europe/Paris")
             .localize(datetime.datetime(1983, 5, 9, 13, 0, 0))
             .astimezone(pytz.utc),
+            # Assume UTC timezone when a datetime object contains no timezone.
             "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0),
         },
         {
diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py
index 3d06d8dd86c5..4ad439719fe5 100644
--- a/bigquery/samples/tests/test_load_table_dataframe.py
+++ b/bigquery/samples/tests/test_load_table_dataframe.py
@@ -44,10 +44,6 @@ def test_load_table_dataframe(capsys, random_table_id):
         "INTEGER",
         "FLOAT",
         "TIMESTAMP",
-        # Due to internal bug 147108331, BigQuery always interprets DATETIME
-        # columns as having the wrong precision. In the meantime, workaround this
-        # by writing the values as TIMESTAMP. See:
-        # https://github.com/googleapis/google-cloud-python/issues/9996
         "TIMESTAMP",
     ]
 
diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py
index 234b1d925ab1..cd72352c29fd 100644
--- a/bigquery/tests/system.py
+++ b/bigquery/tests/system.py
@@ -719,10 +719,8 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
             (
                 bigquery.SchemaField("bool_col", "BOOLEAN"),
                 bigquery.SchemaField("ts_col", "TIMESTAMP"),
-                # Due to internal bug 147108331, BigQuery always interprets
-                # DATETIME columns as having the wrong precision. In the
-                # meantime, workaround this by writing the values as TIMESTAMP.
-                # See:
+                # BigQuery does not support uploading DATETIME values from
+                # Parquet files. See:
                 # https://github.com/googleapis/google-cloud-python/issues/9996
                 bigquery.SchemaField("dt_col", "TIMESTAMP"),
                 bigquery.SchemaField("float32_col", "FLOAT"),
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
index ac93d13fd769..b87ea52a057d 100644
--- a/bigquery/tests/unit/test_client.py
+++ b/bigquery/tests/unit/test_client.py
@@ -6425,10 +6425,6 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
             SchemaField("int_col", "INTEGER"),
             SchemaField("float_col", "FLOAT"),
             SchemaField("bool_col", "BOOLEAN"),
-            # Due to internal bug 147108331, BigQuery always interprets DATETIME
-            # columns as having the wrong precision. In the meantime, workaround this
-            # by writing the values as TIMESTAMP. See:
-            # https://github.com/googleapis/google-cloud-python/issues/9996
             SchemaField("dt_col", "TIMESTAMP"),
             SchemaField("ts_col", "TIMESTAMP"),
         )
@@ -6675,10 +6671,6 @@ def test_load_table_from_dataframe_w_partial_schema(self):
             SchemaField("int_as_float_col", "INTEGER"),
             SchemaField("float_col", "FLOAT"),
             SchemaField("bool_col", "BOOLEAN"),
-            # Due to internal bug 147108331, BigQuery always interprets DATETIME
-            # columns as having the wrong precision. In the meantime, workaround this
-            # by writing the values as TIMESTAMP. See:
-            # https://github.com/googleapis/google-cloud-python/issues/9996
             SchemaField("dt_col", "TIMESTAMP"),
             SchemaField("ts_col", "TIMESTAMP"),
             SchemaField("string_col", "STRING"),

From c62a99c2a22e7968e54bf3005ce900651aa9ba18 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 9 Jan 2020 11:48:18 -0600
Subject: [PATCH 7/7] fix: add missing client fixture

---
 bigquery/samples/tests/test_load_table_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py
index 4ad439719fe5..2286660469ff 100644
--- a/bigquery/samples/tests/test_load_table_dataframe.py
+++ b/bigquery/samples/tests/test_load_table_dataframe.py
@@ -21,7 +21,7 @@
 pyarrow = pytest.importorskip("pyarrow")
 
 
-def test_load_table_dataframe(capsys, random_table_id):
+def test_load_table_dataframe(capsys, client, random_table_id):
 
     table = load_table_dataframe.load_table_dataframe(random_table_id)
     out, _ = capsys.readouterr()