From 9b5787869fafe5f9abe1561d434fe2f11b0b9cfa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 10 Nov 2021 17:05:15 -0600 Subject: [PATCH 1/5] test: upload DATE column with various dtypes --- setup.py | 1 + tests/system/test_to_gbq.py | 75 +++++++++++++++++++++++++++++++++---- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/setup.py b/setup.py index b66c0499..ce02f4e6 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", + "db-dtypes >=0.3.0, <2.0.0dev", "numpy>=1.16.6", "pandas>=0.23.2", "pyarrow >=3.0.0, <7.0dev", diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index d16997fd..3d097a98 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -13,6 +13,11 @@ pytest.importorskip("google.cloud.bigquery", minversion="1.24.0") +@pytest.fixture(params=["default", "load_parquet", "load_csv"]) +def api_method(request): + return request.param + + @pytest.fixture def method_under_test(credentials, project_id): import pandas_gbq @@ -47,11 +52,6 @@ def method_under_test(credentials, project_id): [ "abc", "defg", - # Ensure that empty strings are written as empty string, - # not NULL. See: - # https://github.com/googleapis/python-bigquery-pandas/issues/366 - "", - None, # Ensure that unicode characters are encoded. See: # https://github.com/googleapis/python-bigquery-pandas/issues/106 "信用卡", @@ -61,11 +61,27 @@ def method_under_test(credentials, project_id): name="test_col", ), ), + ( + pandas.Series( + [ + "abc", + "defg", + # Ensure that empty strings are written as empty string, + # not NULL. See: + # https://github.com/googleapis/python-bigquery-pandas/issues/366 + "", + None, + ], + name="empty_strings", + ), + ), ], ) def test_series_round_trip( - method_under_test, random_dataset_id, bigquery_client, input_series + method_under_test, random_dataset_id, bigquery_client, input_series, api_method, ): + if api_method == "load_csv" and input_series.name == "empty_strings": + pytest.skip("Loading empty string with CSV not supported.") table_id = f"{random_dataset_id}.round_trip_{random.randrange(1_000_000)}" input_series = input_series.sort_values().reset_index(drop=True) df = pandas.DataFrame( @@ -73,10 +89,53 @@ def test_series_round_trip( # https://github.com/googleapis/python-bigquery-pandas/issues/366 {"test_col": input_series, "test_col2": input_series} ) - method_under_test(df, table_id) + method_under_test(df, table_id, api_method=api_method) round_trip = bigquery_client.list_rows(table_id).to_dataframe() round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True) pandas.testing.assert_series_equal( - round_trip_series, input_series, check_exact=True, + round_trip_series, input_series, check_exact=True, check_names=False, + ) + + +@pytest.mark.parametrize( + ["input_df", "table_schema"], + [ + # Ensure that a DATE column can be written with datetime64[ns] dtype + # data. See: + # https://github.com/googleapis/python-bigquery-pandas/issues/362 + ( + pandas.DataFrame( + { + "date_col": pandas.Series( + ["2021-04-17", "1999-12-31", "2038-01-19"], + dtype="datetime64[ns]", + ), + } + ), + [{"name": "date_col", "type": "DATE"}], + ), + # TODO: Test with dbdate dtype. + ], +) +def test_dataframe_round_trip_with_table_schema( + method_under_test, + random_dataset_id, + bigquery_client, + input_df, + table_schema, + api_method, +): + table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}" + method_under_test( + input_df, table_id, table_schema=table_schema, api_method=api_method + ) + round_trip = bigquery_client.list_rows(table_id).to_dataframe( + dtypes=dict(zip(input_df.columns, input_df.dtypes)) ) + # TODO: Need to sort by row number before comparing. + pandas.testing.assert_frame_equal(input_df, round_trip) + # round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True) + # pandas.testing.assert_series_equal( + # round_trip_series, input_series, check_exact=True, + # ) From 907a1d5d4194666dda856ccd1929c54719a94b30 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 10 Nov 2021 17:38:09 -0600 Subject: [PATCH 2/5] add dbdate tests --- tests/system/test_to_gbq.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 3d097a98..754469c1 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -28,7 +28,7 @@ def method_under_test(credentials, project_id): @pytest.mark.parametrize( - ["input_series"], + ["input_series", "skip_csv"], [ # Ensure that 64-bit floating point numbers are unchanged. # See: https://github.com/pydata/pandas-gbq/issues/326 @@ -46,6 +46,7 @@ def method_under_test(credentials, project_id): ], name="test_col", ), + False, ), ( pandas.Series( @@ -60,6 +61,7 @@ def method_under_test(credentials, project_id): ], name="test_col", ), + False, ), ( pandas.Series( @@ -74,14 +76,20 @@ def method_under_test(credentials, project_id): ], name="empty_strings", ), + True, ), ], ) def test_series_round_trip( - method_under_test, random_dataset_id, bigquery_client, input_series, api_method, + method_under_test, + random_dataset_id, + bigquery_client, + input_series, + api_method, + skip_csv, ): - if api_method == "load_csv" and input_series.name == "empty_strings": - pytest.skip("Loading empty string with CSV not supported.") + if api_method == "load_csv" and skip_csv: + pytest.skip("Loading with CSV not supported.") table_id = f"{random_dataset_id}.round_trip_{random.randrange(1_000_000)}" input_series = input_series.sort_values().reset_index(drop=True) df = pandas.DataFrame( @@ -99,7 +107,7 @@ def test_series_round_trip( @pytest.mark.parametrize( - ["input_df", "table_schema"], + ["input_df", "table_schema", "skip_csv"], [ # Ensure that a DATE column can be written with datetime64[ns] dtype # data. See: @@ -114,8 +122,19 @@ def test_series_round_trip( } ), [{"name": "date_col", "type": "DATE"}], + True, + ), + ( + pandas.DataFrame( + { + "date_col": pandas.Series( + ["2021-04-17", "1999-12-31", "2038-01-19"], dtype="dbdate", + ), + } + ), + [{"name": "date_col", "type": "DATE"}], + False, ), - # TODO: Test with dbdate dtype. ], ) def test_dataframe_round_trip_with_table_schema( @@ -125,7 +144,10 @@ def test_dataframe_round_trip_with_table_schema( input_df, table_schema, api_method, + skip_csv, ): + if api_method == "load_csv" and skip_csv: + pytest.skip("Loading with CSV not supported.") table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}" method_under_test( input_df, table_id, table_schema=table_schema, api_method=api_method From cf2a5bf7b2ac5543dc02d0ba0258cc02c04b8d40 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 11 Nov 2021 09:52:01 -0600 Subject: [PATCH 3/5] test with db-dtypes only with newer pandas --- noxfile.py | 4 ++-- owlbot.py | 8 +++++-- setup.py | 6 +++-- tests/system/test_to_gbq.py | 47 ++++++++++++++++++++++--------------- 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/noxfile.py b/noxfile.py index ed88b094..9c0b8abf 100644 --- a/noxfile.py +++ b/noxfile.py @@ -28,8 +28,8 @@ BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"] DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] +UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() diff --git a/owlbot.py b/owlbot.py index 76a17e40..71679dd4 100644 --- a/owlbot.py +++ b/owlbot.py @@ -29,12 +29,16 @@ # ---------------------------------------------------------------------------- extras = ["tqdm"] +extras_by_python = { + "3.9": ["tqdm", "db-dtypes"], +} templated_files = common.py_library( - unit_test_python_versions=["3.7", "3.8", "3.9"], - system_test_python_versions=["3.7", "3.8", "3.9"], + unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"], + system_test_python_versions=["3.7", "3.8", "3.9", "3.10"], cov_level=86, unit_test_extras=extras, system_test_extras=extras, + system_test_extras_by_python=extras_by_python, intersphinx_dependencies={ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", "pydata-google-auth": "https://pydata-google-auth.readthedocs.io/en/latest/", diff --git a/setup.py b/setup.py index ce02f4e6..876bd4c0 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ release_status = "Development Status :: 4 - Beta" dependencies = [ "setuptools", - "db-dtypes >=0.3.0, <2.0.0dev", "numpy>=1.16.6", "pandas>=0.23.2", "pyarrow >=3.0.0, <7.0dev", @@ -34,7 +33,10 @@ # https://github.com/pydata/pandas-gbq/issues/343 "google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*", ] -extras = {"tqdm": "tqdm>=4.23.0"} +extras = { + "tqdm": "tqdm>=4.23.0", + "db-dtypes": "db-dtypes >=0.3.0,<2.0.0", +} # Setup boilerplate below this line. diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index 754469c1..bedd7fe9 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -9,6 +9,11 @@ import pandas.testing import pytest +try: + import db_dtypes +except ImportError: + db_dtypes = None + pytest.importorskip("google.cloud.bigquery", minversion="1.24.0") @@ -106,24 +111,24 @@ def test_series_round_trip( ) -@pytest.mark.parametrize( - ["input_df", "table_schema", "skip_csv"], - [ - # Ensure that a DATE column can be written with datetime64[ns] dtype - # data. See: - # https://github.com/googleapis/python-bigquery-pandas/issues/362 - ( - pandas.DataFrame( - { - "date_col": pandas.Series( - ["2021-04-17", "1999-12-31", "2038-01-19"], - dtype="datetime64[ns]", - ), - } - ), - [{"name": "date_col", "type": "DATE"}], - True, +DATAFRAME_ROUND_TRIPS = [ + # Ensure that a DATE column can be written with datetime64[ns] dtype + # data. See: + # https://github.com/googleapis/python-bigquery-pandas/issues/362 + ( + pandas.DataFrame( + { + "date_col": pandas.Series( + ["2021-04-17", "1999-12-31", "2038-01-19"], dtype="datetime64[ns]", + ), + } ), + [{"name": "date_col", "type": "DATE"}], + True, + ), +] +if db_dtypes is not None: + DATAFRAME_ROUND_TRIPS.append( ( pandas.DataFrame( { @@ -134,8 +139,12 @@ def test_series_round_trip( ), [{"name": "date_col", "type": "DATE"}], False, - ), - ], + ) + ) + + +@pytest.mark.parametrize( + ["input_df", "table_schema", "skip_csv"], DATAFRAME_ROUND_TRIPS ) def test_dataframe_round_trip_with_table_schema( method_under_test, From 1ea8fda43c78d915ab2d0367809e10cca33ff970 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 11 Nov 2021 15:55:20 +0000 Subject: [PATCH 4/5] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- CONTRIBUTING.rst | 10 ++++++---- noxfile.py | 6 +++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index bc37b498..90bd84f2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8 and 3.9 on both UNIX and Windows. + 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.9 -- -k + $ nox -s unit-3.10 -- -k .. note:: @@ -143,12 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.9 -- -k + $ nox -s system-3.10 -- -k .. note:: - System tests are only configured to run under Python 3.7, 3.8 and 3.9. + System tests are only configured to run under Python 3.7, 3.8, 3.9 and 3.10. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -224,10 +224,12 @@ We support: - `Python 3.7`_ - `Python 3.8`_ - `Python 3.9`_ +- `Python 3.10`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ +.. _Python 3.10: https://docs.python.org/3.10/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/noxfile.py b/noxfile.py index 9c0b8abf..825daf18 100644 --- a/noxfile.py +++ b/noxfile.py @@ -146,7 +146,11 @@ def system(session): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) - session.install("-e", ".[tqdm]", "-c", constraints_path) + if session.python == "3.9": + extras = "[tqdm,db-dtypes]" + else: + extras = "[tqdm]" + session.install("-e", f".{extras}", "-c", constraints_path) # Run py.test against the system tests. if system_test_exists: From b869a9f895b9f980afb3396f25212dcf9cb7a41f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 11 Nov 2021 09:57:21 -0600 Subject: [PATCH 5/5] sort by row number --- tests/system/test_to_gbq.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py index bedd7fe9..4f315a77 100644 --- a/tests/system/test_to_gbq.py +++ b/tests/system/test_to_gbq.py @@ -158,15 +158,13 @@ def test_dataframe_round_trip_with_table_schema( if api_method == "load_csv" and skip_csv: pytest.skip("Loading with CSV not supported.") table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}" + input_df["row_num"] = input_df.index + input_df.sort_values("row_num", inplace=True) method_under_test( input_df, table_id, table_schema=table_schema, api_method=api_method ) round_trip = bigquery_client.list_rows(table_id).to_dataframe( dtypes=dict(zip(input_df.columns, input_df.dtypes)) ) - # TODO: Need to sort by row number before comparing. + round_trip.sort_values("row_num", inplace=True) pandas.testing.assert_frame_equal(input_df, round_trip) - # round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True) - # pandas.testing.assert_series_equal( - # round_trip_series, input_series, check_exact=True, - # )