From 6965558f1dd564d7024c113a5fb29fdf08ec4b11 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 30 Dec 2019 16:58:09 -0600 Subject: [PATCH 1/7] fix(bigquery): write pandas datetime[ns] columns to BigQuery TIMESTAMP columns Also: * Enable TIMESTAMP and DATETIME unit tests for `_pandas_helpers`. * Add more data types to load dataframe sample. --- .../google/cloud/bigquery/_pandas_helpers.py | 8 +- bigquery/samples/load_table_dataframe.py | 52 ++++++++++-- .../tests/test_load_table_dataframe.py | 67 ++++++++++++++- bigquery/tests/unit/test__pandas_helpers.py | 84 ++++++++++++------- 4 files changed, 173 insertions(+), 38 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py index 6e91a9624b06..aa017b904720 100644 --- a/bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -52,7 +52,11 @@ _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - "datetime64[ns]": "DATETIME", + # Due to internal bug 147108331, BigQuery always interprets DATETIME + # columns as having the wrong precision. In the meantime, workaround this + # by writing the values as TIMESTAMP. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + "datetime64[ns]": "TIMESTAMP", "float32": "FLOAT", "float64": "FLOAT", "int8": "INTEGER", @@ -218,7 +222,7 @@ def bq_to_arrow_array(series, bq_field): return pyarrow.ListArray.from_pandas(series, type=arrow_type) if field_type_upper in schema._STRUCT_TYPES: return pyarrow.StructArray.from_pandas(series, type=arrow_type) - return pyarrow.array(series, type=arrow_type) + return pyarrow.Array.from_pandas(series, type=arrow_type) def get_column_or_index(dataframe, name): diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py index 8cfb34424457..86227164638f 100644 --- a/bigquery/samples/load_table_dataframe.py +++ b/bigquery/samples/load_table_dataframe.py @@ -16,9 +16,11 @@ def load_table_dataframe(client, table_id): # [START bigquery_load_table_dataframe] - from google.cloud import bigquery + import datetime + from google.cloud import bigquery import pandas + import pytz # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -27,16 +29,54 @@ def load_table_dataframe(client, table_id): # table_id = "your-project.your_dataset.your_table_name" records = [ - {"title": u"The Meaning of Life", "release_year": 1983}, - {"title": u"Monty Python and the Holy Grail", "release_year": 1975}, - {"title": u"Life of Brian", "release_year": 1979}, - {"title": u"And Now for Something Completely Different", "release_year": 1971}, + { + "title": u"The Meaning of Life", + "release_year": 1983, + "length_minutes": 112.5, + "release_date": datetime.datetime( + 1983, 5, 9, 13, 0, 0, tzinfo=pytz.timezone("Europe/Paris") + ), + "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), + }, + { + "title": u"Monty Python and the Holy Grail", + "release_year": 1975, + "length_minutes": 91.5, + "release_date": datetime.datetime( + 1975, 4, 9, 23, 59, 2, tzinfo=pytz.timezone("Europe/London") + ), + "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0), + }, + { + "title": u"Life of Brian", + "release_year": 1979, + "length_minutes": 94.25, + "release_date": datetime.datetime( + 1979, 8, 17, 23, 59, 5, tzinfo=pytz.timezone("America/New_York") + ), + "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0), + }, + { + "title": u"And Now for Something Completely Different", + "release_year": 1971, + "length_minutes": 88.0, + "release_date": datetime.datetime( + 1971, 9, 28, 23, 59, 7, tzinfo=pytz.timezone("Europe/London") + ), + "dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0), + }, ] dataframe = pandas.DataFrame( records, # In the loaded table, the column order reflects the order of the # columns in the DataFrame. - columns=["title", "release_year"], + columns=[ + "title", + "release_year", + "length_minutes", + "release_date", + "dvd_release", + ], # Optionally, set a named index, which can also be written to the # BigQuery table. index=pandas.Index( diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py index 3b7cb16ea692..e5cfdddf08c2 100644 --- a/bigquery/samples/tests/test_load_table_dataframe.py +++ b/bigquery/samples/tests/test_load_table_dataframe.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime + import pytest +import pytz from .. import load_table_dataframe @@ -25,7 +28,67 @@ def test_load_table_dataframe(capsys, client, random_table_id): table = load_table_dataframe.load_table_dataframe(client, random_table_id) out, _ = capsys.readouterr() - assert "Loaded 4 rows and 3 columns" in out + expected_column_names = [ + "wikidata_id", + "title", + "release_year", + "length_minutes", + "release_date", + "dvd_release", + ] + assert "Loaded 4 rows and {} columns".format(len(expected_column_names)) in out column_names = [field.name for field in table.schema] - assert column_names == ["wikidata_id", "title", "release_year"] + assert column_names == expected_column_names + column_types = [field.field_type for field in table.schema] + assert column_types == [ + "STRING", + "STRING", + "INTEGER", + "FLOAT", + "TIMESTAMP", + # Due to internal bug 147108331, BigQuery always interprets DATETIME + # columns as having the wrong precision. In the meantime, workaround this + # by writing the values as TIMESTAMP. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + "TIMESTAMP", + ] + + df = client.list_rows(table).to_dataframe() + df.sort_values("release_year", inplace=True) + assert df["title"].tolist() == [ + u"And Now for Something Completely Different", + u"Monty Python and the Holy Grail", + u"Life of Brian", + u"The Meaning of Life", + ] + assert df["release_year"].tolist() == [ + 1971, + 1975, + 1979, + 1983, + ] + assert df["length_minutes"].tolist() == [ + 88.0, + 91.5, + 94.25, + 112.5, + ] + assert df["release_date"].tolist() == [ + pandas.Timestamp("1971-09-28T23:59:07+00:00"), + pandas.Timestamp("1975-04-09T23:59:02+00:00"), + pandas.Timestamp("1979-08-17T23:59:05+00:00"), + pandas.Timestamp("1983-05-09T13:00:00+00:00"), + ] + assert df["dvd_release"].tolist() == [ + pandas.Timestamp("2003-10-22T10:00:00+00:00"), + pandas.Timestamp("2002-07-16T09:00:00+00:00"), + pandas.Timestamp("2008-01-14T08:00:00+00:00"), + pandas.Timestamp("2002-01-22T07:00:00+00:00"), + ] + assert df["wikidata_id"].tolist() == [ + u"Q16403", + u"Q25043", + u"Q24953", + u"Q24980", + ] diff --git a/bigquery/tests/unit/test__pandas_helpers.py b/bigquery/tests/unit/test__pandas_helpers.py index b2d74d54e120..ac613fd50e6f 100644 --- a/bigquery/tests/unit/test__pandas_helpers.py +++ b/bigquery/tests/unit/test__pandas_helpers.py @@ -92,6 +92,7 @@ def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) assert not is_datetime(pyarrow.timestamp("us", tz="UTC")) + assert not is_datetime(pyarrow.timestamp("ns", tz="UTC")) assert not is_datetime(pyarrow.string()) @@ -386,20 +387,15 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), - # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is - # resolved, test with TIMESTAMP column. Conversion from pyarrow - # TimestampArray to list of Python objects fails with OverflowError: - # Python int too large to convert to C long. - # - # ( - # "TIMESTAMP", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - # datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - # ], - # ), + ( + "TIMESTAMP", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + ], + ), ( "DATE", [ @@ -418,20 +414,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): datetime.time(12, 0, 0), ], ), - # TODO: Once https://issues.apache.org/jira/browse/ARROW-5450 is - # resolved, test with DATETIME column. Conversion from pyarrow - # TimestampArray to list of Python objects fails with OverflowError: - # Python int too large to convert to C long. - # - # ( - # "DATETIME", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # datetime.datetime(1970, 1, 1, 0, 0, 0), - # ], - # ), + ( + "DATETIME", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + None, + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(1999, 3, 14, 15, 9, 26, 535898), + ], + ), ( "GEOGRAPHY", [ @@ -453,6 +445,42 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): assert rows == roundtrip +@pytest.mark.parametrize( + "bq_type,rows", + [ + ( + "TIMESTAMP", + [ + "1971-09-28T23:59:07+00:00", + "1975-04-09T23:59:02+00:00", + "1979-08-17T23:59:05+00:00", + "NaT", + "1983-05-09T13:00:00+00:00", + ], + ), + ( + "DATETIME", + [ + "1971-09-28T23:59:07", + "1975-04-09T23:59:02", + "1979-08-17T23:59:05", + "NaT", + "1983-05-09T13:00:00", + ], + ), + ], +) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): + rows = [pandas.Timestamp(row) for row in rows] + series = pandas.Series(rows) + bq_field = schema.SchemaField("field_name", bq_type) + arrow_array = module_under_test.bq_to_arrow_array(series, bq_field) + roundtrip = arrow_array.to_pandas() + assert series.equals(roundtrip) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): From 58ed3dc180fef0f9c343c8fa572b3a781ab8e8ca Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Jan 2020 17:15:37 -0600 Subject: [PATCH 2/7] blacken --- bigquery/tests/unit/test__pandas_helpers.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bigquery/tests/unit/test__pandas_helpers.py b/bigquery/tests/unit/test__pandas_helpers.py index ac613fd50e6f..6adf098c03c8 100644 --- a/bigquery/tests/unit/test__pandas_helpers.py +++ b/bigquery/tests/unit/test__pandas_helpers.py @@ -451,21 +451,21 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ( "TIMESTAMP", [ - "1971-09-28T23:59:07+00:00", - "1975-04-09T23:59:02+00:00", - "1979-08-17T23:59:05+00:00", - "NaT", - "1983-05-09T13:00:00+00:00", + "1971-09-28T23:59:07+00:00", + "1975-04-09T23:59:02+00:00", + "1979-08-17T23:59:05+00:00", + "NaT", + "1983-05-09T13:00:00+00:00", ], ), ( "DATETIME", [ - "1971-09-28T23:59:07", - "1975-04-09T23:59:02", - "1979-08-17T23:59:05", - "NaT", - "1983-05-09T13:00:00", + "1971-09-28T23:59:07", + "1975-04-09T23:59:02", + "1979-08-17T23:59:05", + "NaT", + "1983-05-09T13:00:00", ], ), ], From de673650f7bb0a554cf89efaa10be797f7ea511d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Jan 2020 17:16:22 -0600 Subject: [PATCH 3/7] lint --- bigquery/samples/tests/test_load_table_dataframe.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py index e5cfdddf08c2..86c42dde55d5 100644 --- a/bigquery/samples/tests/test_load_table_dataframe.py +++ b/bigquery/samples/tests/test_load_table_dataframe.py @@ -12,10 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime - import pytest -import pytz from .. import load_table_dataframe From 36fee787290061d7520427f04bbdc6a481a22691 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 2 Jan 2020 17:21:21 -0600 Subject: [PATCH 4/7] update client tests --- bigquery/tests/unit/test_client.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index a5100fe6eaef..82fd5cc80c55 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -6389,7 +6389,11 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("int_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "DATETIME"), + # Due to internal bug 147108331, BigQuery always interprets DATETIME + # columns as having the wrong precision. In the meantime, workaround this + # by writing the values as TIMESTAMP. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), ) @@ -6635,7 +6639,11 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("int_as_float_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - SchemaField("dt_col", "DATETIME"), + # Due to internal bug 147108331, BigQuery always interprets DATETIME + # columns as having the wrong precision. In the meantime, workaround this + # by writing the values as TIMESTAMP. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("string_col", "STRING"), SchemaField("bytes_col", "BYTES"), From 4fda7f5312eb1780e23a7d09d52926d1a1b6960a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 3 Jan 2020 09:25:19 -0600 Subject: [PATCH 5/7] doc: show timezone conversions for timestamp columns Pandas doesn't automatically convert datetime objects to UTC time, so show how to do this in the code sample. --- bigquery/samples/load_table_dataframe.py | 24 +++++++-------- .../tests/test_load_table_dataframe.py | 29 +++++-------------- bigquery/tests/system.py | 7 ++++- 3 files changed, 25 insertions(+), 35 deletions(-) diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py index 86227164638f..e27ac0aabde6 100644 --- a/bigquery/samples/load_table_dataframe.py +++ b/bigquery/samples/load_table_dataframe.py @@ -33,36 +33,36 @@ def load_table_dataframe(client, table_id): "title": u"The Meaning of Life", "release_year": 1983, "length_minutes": 112.5, - "release_date": datetime.datetime( - 1983, 5, 9, 13, 0, 0, tzinfo=pytz.timezone("Europe/Paris") - ), + "release_date": pytz.timezone("Europe/Paris") + .localize(datetime.datetime(1983, 5, 9, 13, 0, 0)) + .astimezone(pytz.utc), "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), }, { "title": u"Monty Python and the Holy Grail", "release_year": 1975, "length_minutes": 91.5, - "release_date": datetime.datetime( - 1975, 4, 9, 23, 59, 2, tzinfo=pytz.timezone("Europe/London") - ), + "release_date": pytz.timezone("Europe/London") + .localize(datetime.datetime(1975, 4, 9, 23, 59, 2)) + .astimezone(pytz.utc), "dvd_release": datetime.datetime(2002, 7, 16, 9, 0, 0), }, { "title": u"Life of Brian", "release_year": 1979, "length_minutes": 94.25, - "release_date": datetime.datetime( - 1979, 8, 17, 23, 59, 5, tzinfo=pytz.timezone("America/New_York") - ), + "release_date": pytz.timezone("America/New_York") + .localize(datetime.datetime(1979, 8, 17, 23, 59, 5)) + .astimezone(pytz.utc), "dvd_release": datetime.datetime(2008, 1, 14, 8, 0, 0), }, { "title": u"And Now for Something Completely Different", "release_year": 1971, "length_minutes": 88.0, - "release_date": datetime.datetime( - 1971, 9, 28, 23, 59, 7, tzinfo=pytz.timezone("Europe/London") - ), + "release_date": pytz.timezone("Europe/London") + .localize(datetime.datetime(1971, 9, 28, 23, 59, 7)) + .astimezone(pytz.utc), "dvd_release": datetime.datetime(2003, 10, 22, 10, 0, 0), }, ] diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py index 86c42dde55d5..05025b041ed0 100644 --- a/bigquery/samples/tests/test_load_table_dataframe.py +++ b/bigquery/samples/tests/test_load_table_dataframe.py @@ -59,23 +59,13 @@ def test_load_table_dataframe(capsys, client, random_table_id): u"Life of Brian", u"The Meaning of Life", ] - assert df["release_year"].tolist() == [ - 1971, - 1975, - 1979, - 1983, - ] - assert df["length_minutes"].tolist() == [ - 88.0, - 91.5, - 94.25, - 112.5, - ] + assert df["release_year"].tolist() == [1971, 1975, 1979, 1983] + assert df["length_minutes"].tolist() == [88.0, 91.5, 94.25, 112.5] assert df["release_date"].tolist() == [ - pandas.Timestamp("1971-09-28T23:59:07+00:00"), - pandas.Timestamp("1975-04-09T23:59:02+00:00"), - pandas.Timestamp("1979-08-17T23:59:05+00:00"), - pandas.Timestamp("1983-05-09T13:00:00+00:00"), + pandas.Timestamp("1971-09-28T22:59:07+00:00"), + pandas.Timestamp("1975-04-09T22:59:02+00:00"), + pandas.Timestamp("1979-08-18T03:59:05+00:00"), + pandas.Timestamp("1983-05-09T11:00:00+00:00"), ] assert df["dvd_release"].tolist() == [ pandas.Timestamp("2003-10-22T10:00:00+00:00"), @@ -83,9 +73,4 @@ def test_load_table_dataframe(capsys, client, random_table_id): pandas.Timestamp("2008-01-14T08:00:00+00:00"), pandas.Timestamp("2002-01-22T07:00:00+00:00"), ] - assert df["wikidata_id"].tolist() == [ - u"Q16403", - u"Q25043", - u"Q24953", - u"Q24980", - ] + assert df["wikidata_id"].tolist() == [u"Q16403", u"Q25043", u"Q24953", u"Q24980"] diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index b431f628d001..234b1d925ab1 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -719,7 +719,12 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - bigquery.SchemaField("dt_col", "DATETIME"), + # Due to internal bug 147108331, BigQuery always interprets + # DATETIME columns as having the wrong precision. In the + # meantime, workaround this by writing the values as TIMESTAMP. + # See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + bigquery.SchemaField("dt_col", "TIMESTAMP"), bigquery.SchemaField("float32_col", "FLOAT"), bigquery.SchemaField("float64_col", "FLOAT"), bigquery.SchemaField("int8_col", "INTEGER"), From f7531e0581fc28e12ab86adc180d4a661e32cb4d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Jan 2020 11:46:04 -0600 Subject: [PATCH 6/7] doc: update comments to indicate desired use of TIMESTAMP --- bigquery/google/cloud/bigquery/_pandas_helpers.py | 6 ++---- bigquery/samples/load_table_dataframe.py | 1 + bigquery/samples/tests/test_load_table_dataframe.py | 4 ---- bigquery/tests/system.py | 6 ++---- bigquery/tests/unit/test_client.py | 8 -------- 5 files changed, 5 insertions(+), 20 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_pandas_helpers.py b/bigquery/google/cloud/bigquery/_pandas_helpers.py index aa017b904720..645478ff6d4b 100644 --- a/bigquery/google/cloud/bigquery/_pandas_helpers.py +++ b/bigquery/google/cloud/bigquery/_pandas_helpers.py @@ -52,10 +52,8 @@ _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", - # Due to internal bug 147108331, BigQuery always interprets DATETIME - # columns as having the wrong precision. In the meantime, workaround this - # by writing the values as TIMESTAMP. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 + # BigQuery does not support uploading DATETIME values from Parquet files. + # See: https://github.com/googleapis/google-cloud-python/issues/9996 "datetime64[ns]": "TIMESTAMP", "float32": "FLOAT", "float64": "FLOAT", diff --git a/bigquery/samples/load_table_dataframe.py b/bigquery/samples/load_table_dataframe.py index 86b6dbaedf98..91dd6e9f09fc 100644 --- a/bigquery/samples/load_table_dataframe.py +++ b/bigquery/samples/load_table_dataframe.py @@ -36,6 +36,7 @@ def load_table_dataframe(table_id): "release_date": pytz.timezone("Europe/Paris") .localize(datetime.datetime(1983, 5, 9, 13, 0, 0)) .astimezone(pytz.utc), + # Assume UTC timezone when a datetime object contains no timezone. "dvd_release": datetime.datetime(2002, 1, 22, 7, 0, 0), }, { diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py index 3d06d8dd86c5..4ad439719fe5 100644 --- a/bigquery/samples/tests/test_load_table_dataframe.py +++ b/bigquery/samples/tests/test_load_table_dataframe.py @@ -44,10 +44,6 @@ def test_load_table_dataframe(capsys, random_table_id): "INTEGER", "FLOAT", "TIMESTAMP", - # Due to internal bug 147108331, BigQuery always interprets DATETIME - # columns as having the wrong precision. In the meantime, workaround this - # by writing the values as TIMESTAMP. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 "TIMESTAMP", ] diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 234b1d925ab1..cd72352c29fd 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -719,10 +719,8 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ( bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("ts_col", "TIMESTAMP"), - # Due to internal bug 147108331, BigQuery always interprets - # DATETIME columns as having the wrong precision. In the - # meantime, workaround this by writing the values as TIMESTAMP. - # See: + # BigQuery does not support uploading DATETIME values from + # Parquet files. See: # https://github.com/googleapis/google-cloud-python/issues/9996 bigquery.SchemaField("dt_col", "TIMESTAMP"), bigquery.SchemaField("float32_col", "FLOAT"), diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index ac93d13fd769..b87ea52a057d 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -6425,10 +6425,6 @@ def test_load_table_from_dataframe_w_automatic_schema(self): SchemaField("int_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - # Due to internal bug 147108331, BigQuery always interprets DATETIME - # columns as having the wrong precision. In the meantime, workaround this - # by writing the values as TIMESTAMP. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), ) @@ -6675,10 +6671,6 @@ def test_load_table_from_dataframe_w_partial_schema(self): SchemaField("int_as_float_col", "INTEGER"), SchemaField("float_col", "FLOAT"), SchemaField("bool_col", "BOOLEAN"), - # Due to internal bug 147108331, BigQuery always interprets DATETIME - # columns as having the wrong precision. In the meantime, workaround this - # by writing the values as TIMESTAMP. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 SchemaField("dt_col", "TIMESTAMP"), SchemaField("ts_col", "TIMESTAMP"), SchemaField("string_col", "STRING"), From c62a99c2a22e7968e54bf3005ce900651aa9ba18 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 9 Jan 2020 11:48:18 -0600 Subject: [PATCH 7/7] fix: add missing client fixture --- bigquery/samples/tests/test_load_table_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/samples/tests/test_load_table_dataframe.py b/bigquery/samples/tests/test_load_table_dataframe.py index 4ad439719fe5..2286660469ff 100644 --- a/bigquery/samples/tests/test_load_table_dataframe.py +++ b/bigquery/samples/tests/test_load_table_dataframe.py @@ -21,7 +21,7 @@ pyarrow = pytest.importorskip("pyarrow") -def test_load_table_dataframe(capsys, random_table_id): +def test_load_table_dataframe(capsys, client, random_table_id): table = load_table_dataframe.load_table_dataframe(random_table_id) out, _ = capsys.readouterr()