test: upload DATE column with various dtypes (#420)

tswast · gcf-owl-bot[bot] · web-flow · commit 3e70975f956e · 2021-11-16T16:55:58.000-06:00
* test: upload DATE column with various dtypes * add dbdate tests * test with db-dtypes only with newer pandas * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * sort by row number Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -22,7 +22,7 @@ In order to add a feature:
   documentation.
 
 - The feature must work fully on the following CPython versions:
-  3.7, 3.8 and 3.9 on both UNIX and Windows.
+  3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows.
 
 - The feature must not add unnecessary dependencies (where
   "unnecessary" is of course subjective, but new dependencies should
@@ -72,7 +72,7 @@ We use `nox <https://nox.readthedocs.io/en/latest/>`__ to instrument our tests.
 
 - To run a single unit test::
 
-    $ nox -s unit-3.9 -- -k <name of test>
+    $ nox -s unit-3.10 -- -k <name of test>
 
 
   .. note::
@@ -143,12 +143,12 @@ Running System Tests
    $ nox -s system
 
    # Run a single system test
-   $ nox -s system-3.9 -- -k <name of test>
+   $ nox -s system-3.10 -- -k <name of test>
 
 
   .. note::
 
-      System tests are only configured to run under Python 3.7, 3.8 and 3.9.
+      System tests are only configured to run under Python 3.7, 3.8, 3.9 and 3.10.
       For expediency, we do not run them in older versions of Python 3.
 
   This alone will not run the tests. You'll need to change some local
@@ -224,10 +224,12 @@ We support:
 -  `Python 3.7`_
 -  `Python 3.8`_
 -  `Python 3.9`_
+-  `Python 3.10`_
 
 .. _Python 3.7: https://docs.python.org/3.7/
 .. _Python 3.8: https://docs.python.org/3.8/
 .. _Python 3.9: https://docs.python.org/3.9/
+.. _Python 3.10: https://docs.python.org/3.10/
 
 
 Supported versions can be found in our ``noxfile.py`` `config`_.
diff --git a/noxfile.py b/noxfile.py
@@ -28,8 +28,8 @@
 BLACK_PATHS = ["docs", "pandas_gbq", "tests", "noxfile.py", "setup.py"]
 
 DEFAULT_PYTHON_VERSION = "3.8"
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
-UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9"]
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
 
 CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
 
@@ -146,7 +146,11 @@ def system(session):
     # Install all test dependencies, then install this package into the
     # virtualenv's dist-packages.
     session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path)
-    session.install("-e", ".[tqdm]", "-c", constraints_path)
+    if session.python == "3.9":
+        extras = "[tqdm,db-dtypes]"
+    else:
+        extras = "[tqdm]"
+    session.install("-e", f".{extras}", "-c", constraints_path)
 
     # Run py.test against the system tests.
     if system_test_exists:
diff --git a/owlbot.py b/owlbot.py
@@ -29,12 +29,16 @@
 # ----------------------------------------------------------------------------
 
 extras = ["tqdm"]
+extras_by_python = {
+    "3.9": ["tqdm", "db-dtypes"],
+}
 templated_files = common.py_library(
-    unit_test_python_versions=["3.7", "3.8", "3.9"],
-    system_test_python_versions=["3.7", "3.8", "3.9"],
+    unit_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
+    system_test_python_versions=["3.7", "3.8", "3.9", "3.10"],
     cov_level=86,
     unit_test_extras=extras,
     system_test_extras=extras,
+    system_test_extras_by_python=extras_by_python,
     intersphinx_dependencies={
         "pandas": "https://pandas.pydata.org/pandas-docs/stable/",
         "pydata-google-auth": "https://pydata-google-auth.readthedocs.io/en/latest/",
diff --git a/setup.py b/setup.py
@@ -33,7 +33,10 @@
     # https://github.com/pydata/pandas-gbq/issues/343
     "google-cloud-bigquery[bqstorage,pandas]>=1.11.1,<3.0.0dev,!=2.4.*",
 ]
-extras = {"tqdm": "tqdm>=4.23.0"}
+extras = {
+    "tqdm": "tqdm>=4.23.0",
+    "db-dtypes": "db-dtypes >=0.3.0,<2.0.0",
+}
 
 # Setup boilerplate below this line.
 
diff --git a/tests/system/test_to_gbq.py b/tests/system/test_to_gbq.py
@@ -9,10 +9,20 @@
 import pandas.testing
 import pytest
 
+try:
+    import db_dtypes
+except ImportError:
+    db_dtypes = None
+
 
 pytest.importorskip("google.cloud.bigquery", minversion="1.24.0")
 
 
+@pytest.fixture(params=["default", "load_parquet", "load_csv"])
+def api_method(request):
+    return request.param
+
+
 @pytest.fixture
 def method_under_test(credentials, project_id):
     import pandas_gbq
@@ -23,7 +33,7 @@ def method_under_test(credentials, project_id):
 
 
 @pytest.mark.parametrize(
-    ["input_series"],
+    ["input_series", "skip_csv"],
     [
         # Ensure that 64-bit floating point numbers are unchanged.
         # See: https://github.com/pydata/pandas-gbq/issues/326
@@ -41,17 +51,13 @@ def method_under_test(credentials, project_id):
                 ],
                 name="test_col",
             ),
+            False,
         ),
         (
             pandas.Series(
                 [
                     "abc",
                     "defg",
-                    # Ensure that empty strings are written as empty string,
-                    # not NULL. See:
-                    # https://github.com/googleapis/python-bigquery-pandas/issues/366
-                    "",
-                    None,
                     # Ensure that unicode characters are encoded. See:
                     # https://github.com/googleapis/python-bigquery-pandas/issues/106
                     "信用卡",
@@ -60,23 +66,105 @@ def method_under_test(credentials, project_id):
                 ],
                 name="test_col",
             ),
+            False,
+        ),
+        (
+            pandas.Series(
+                [
+                    "abc",
+                    "defg",
+                    # Ensure that empty strings are written as empty string,
+                    # not NULL. See:
+                    # https://github.com/googleapis/python-bigquery-pandas/issues/366
+                    "",
+                    None,
+                ],
+                name="empty_strings",
+            ),
+            True,
         ),
     ],
 )
 def test_series_round_trip(
-    method_under_test, random_dataset_id, bigquery_client, input_series
+    method_under_test,
+    random_dataset_id,
+    bigquery_client,
+    input_series,
+    api_method,
+    skip_csv,
 ):
+    if api_method == "load_csv" and skip_csv:
+        pytest.skip("Loading with CSV not supported.")
     table_id = f"{random_dataset_id}.round_trip_{random.randrange(1_000_000)}"
     input_series = input_series.sort_values().reset_index(drop=True)
     df = pandas.DataFrame(
         # Some errors only occur in multi-column dataframes. See:
         # https://github.com/googleapis/python-bigquery-pandas/issues/366
         {"test_col": input_series, "test_col2": input_series}
     )
-    method_under_test(df, table_id)
+    method_under_test(df, table_id, api_method=api_method)
 
     round_trip = bigquery_client.list_rows(table_id).to_dataframe()
     round_trip_series = round_trip["test_col"].sort_values().reset_index(drop=True)
     pandas.testing.assert_series_equal(
-        round_trip_series, input_series, check_exact=True,
+        round_trip_series, input_series, check_exact=True, check_names=False,
+    )
+
+
+DATAFRAME_ROUND_TRIPS = [
+    # Ensure that a DATE column can be written with datetime64[ns] dtype
+    # data. See:
+    # https://github.com/googleapis/python-bigquery-pandas/issues/362
+    (
+        pandas.DataFrame(
+            {
+                "date_col": pandas.Series(
+                    ["2021-04-17", "1999-12-31", "2038-01-19"], dtype="datetime64[ns]",
+                ),
+            }
+        ),
+        [{"name": "date_col", "type": "DATE"}],
+        True,
+    ),
+]
+if db_dtypes is not None:
+    DATAFRAME_ROUND_TRIPS.append(
+        (
+            pandas.DataFrame(
+                {
+                    "date_col": pandas.Series(
+                        ["2021-04-17", "1999-12-31", "2038-01-19"], dtype="dbdate",
+                    ),
+                }
+            ),
+            [{"name": "date_col", "type": "DATE"}],
+            False,
+        )
+    )
+
+
+@pytest.mark.parametrize(
+    ["input_df", "table_schema", "skip_csv"], DATAFRAME_ROUND_TRIPS
+)
+def test_dataframe_round_trip_with_table_schema(
+    method_under_test,
+    random_dataset_id,
+    bigquery_client,
+    input_df,
+    table_schema,
+    api_method,
+    skip_csv,
+):
+    if api_method == "load_csv" and skip_csv:
+        pytest.skip("Loading with CSV not supported.")
+    table_id = f"{random_dataset_id}.round_trip_w_schema_{random.randrange(1_000_000)}"
+    input_df["row_num"] = input_df.index
+    input_df.sort_values("row_num", inplace=True)
+    method_under_test(
+        input_df, table_id, table_schema=table_schema, api_method=api_method
+    )
+    round_trip = bigquery_client.list_rows(table_id).to_dataframe(
+        dtypes=dict(zip(input_df.columns, input_df.dtypes))
     )
+    round_trip.sort_values("row_num", inplace=True)
+    pandas.testing.assert_frame_equal(input_df, round_trip)