From 14a2dae5a55ae329ad764aad1c568b7320197257 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Sat, 8 Jun 2024 20:01:41 +0000
Subject: [PATCH 1/2] chore: cleanup type errors in
 tests/system/small/test_dataframe.py

---
 tests/system/small/test_dataframe.py | 117 ++++++++++++---------------
 1 file changed, 52 insertions(+), 65 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index ffc09a1a1f..3e50485fc8 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -17,7 +17,7 @@
 import sys
 import tempfile
 import typing
-from typing import Tuple
+from typing import Dict, List, Tuple
 
 import geopandas as gpd  # type: ignore
 import numpy as np
@@ -146,9 +146,9 @@ def test_df_construct_inline_respects_location():
     with bpd.option_context("bigquery.location", "europe-west1"):
         df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
         repr(df)
+        assert df.query_job is not None
+        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
 
-        # TODO(b/340876936): fix type error
-        table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)  # type: ignore
         assert table.location == "europe-west1"
 
 
@@ -753,10 +753,9 @@ def test_assign_listlike_to_empty_df(session):
 def test_assign_to_empty_df_multiindex_error(session):
     empty_df = dataframe.DataFrame(session=session)
     empty_pandas_df = pd.DataFrame()
-    # TODO(b/340876936): fix type error
-    empty_df["empty_col_1"] = []  # type: ignore
-    # TODO(b/340876936): fix type error
-    empty_df["empty_col_2"] = []  # type: ignore
+
+    empty_df["empty_col_1"] = typing.cast(series.Series, [])
+    empty_df["empty_col_2"] = typing.cast(series.Series, [])
     empty_pandas_df["empty_col_1"] = []
     empty_pandas_df["empty_col_2"] = []
     empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
@@ -1340,40 +1339,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
 
 def test_get_dtypes(scalars_df_default_index):
     dtypes = scalars_df_default_index.dtypes
+    dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
+        "bool_col": pd.BooleanDtype(),
+        "bytes_col": pd.ArrowDtype(pa.binary()),
+        "date_col": pd.ArrowDtype(pa.date32()),
+        "datetime_col": pd.ArrowDtype(pa.timestamp("us")),
+        "geography_col": gpd.array.GeometryDtype(),
+        "int64_col": pd.Int64Dtype(),
+        "int64_too": pd.Int64Dtype(),
+        "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
+        "float64_col": pd.Float64Dtype(),
+        "rowindex": pd.Int64Dtype(),
+        "rowindex_2": pd.Int64Dtype(),
+        "string_col": pd.StringDtype(storage="pyarrow"),
+        "time_col": pd.ArrowDtype(pa.time64("us")),
+        "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
+    }
     pd.testing.assert_series_equal(
         dtypes,
-        pd.Series(
-            {
-                # TODO(b/340876936): fix type error
-                "bool_col": pd.BooleanDtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "bytes_col": pd.ArrowDtype(pa.binary()),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "date_col": pd.ArrowDtype(pa.date32()),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "datetime_col": pd.ArrowDtype(pa.timestamp("us")),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "geography_col": gpd.array.GeometryDtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "int64_col": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "int64_too": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "float64_col": pd.Float64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "rowindex": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "rowindex_2": pd.Int64Dtype(),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "string_col": pd.StringDtype(storage="pyarrow"),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "time_col": pd.ArrowDtype(pa.time64("us")),  # type: ignore
-                # TODO(b/340876936): fix type error
-                "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),  # type: ignore
-            }
-        ),
+        pd.Series(dtypes_dict),
     )
 
 
@@ -1828,10 +1812,9 @@ def test_df_update(overwrite, filter_func):
     if pd.__version__.startswith("1."):
         pytest.skip("dtype handled differently in pandas 1.x.")
 
-    # TODO(b/340876936): fix type error
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")  # type: ignore
-    # TODO(b/340876936): fix type error
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")  # type: ignore
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
     pd_df1 = pandas.DataFrame(
         {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
     )
@@ -1891,10 +1874,10 @@ def test_df_idxmax():
     ],
 )
 def test_df_align(join, axis):
-    # TODO(b/340876936): fix type error
-    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")  # type: ignore
-    # TODO(b/340876936): fix type error
-    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")  # type: ignore
+
+    index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
+
+    index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
     pd_df1 = pandas.DataFrame(
         {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
     )
@@ -1911,10 +1894,11 @@ def test_df_align(join, axis):
     pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
 
     # Don't check dtype as pandas does unnecessary float conversion
-    # TODO(b/340876936): fix type error
-    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)  # type: ignore
-    # TODO(b/340876936): fix type error
-    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)  # type: ignore
+    assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
+        bf_result2, dataframe.DataFrame
+    )
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
 
 
 def test_combine_first(
@@ -2568,11 +2552,15 @@ def test_df_transpose():
     # Include some floats to ensure type coercion
     values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
     # Test complex case of both axes being multi-indices with non-unique elements
-    # TODO(b/340876936): fix type error
-    columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow"))  # type: ignore
+
+    columns: pandas.Index = pd.Index(
+        ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
+    )
     columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
-    # TODO(b/340876936): fix type error
-    index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow"))  # type: ignore
+
+    index: pandas.Index = pd.Index(
+        ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
+    )
     rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
 
     pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
@@ -3742,10 +3730,9 @@ def test_df_setattr_index():
         [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
     )
     bf_df = dataframe.DataFrame(pd_df)
-    # TODO(b/340876936): fix type error
-    pd_df.index = [4, 5]  # type: ignore
-    # TODO(b/340876936): fix type error
-    bf_df.index = [4, 5]  # type: ignore
+
+    pd_df.index = typing.cast(pandas.Index, [4, 5])
+    bf_df.index = [4, 5]
 
     assert_pandas_df_equal(
         pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3757,10 +3744,10 @@ def test_df_setattr_columns():
         [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
     )
     bf_df = dataframe.DataFrame(pd_df)
-    # TODO(b/340876936): fix type error
-    pd_df.columns = [4, 5, 6]  # type: ignore
-    # TODO(b/340876936): fix type error
-    bf_df.columns = [4, 5, 6]  # type: ignore
+
+    pd_df.columns = typing.cast(pandas.Index, [4, 5, 6])
+
+    bf_df.columns = typing.cast(pandas.Index, [4, 5, 6])
 
     assert_pandas_df_equal(
         pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3854,8 +3841,8 @@ def test_iloc_list_multiindex(scalars_dfs):
 
 
 def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
-    # TODO(b/340876936): fix type error
-    index_list = []  # type: ignore
+
+    index_list: List[int] = []
 
     bf_result = scalars_df_index.iloc[index_list]
     pd_result = scalars_pandas_df_index.iloc[index_list]

From 1d1ad612608d14cf5c3dbbc43d5ee73c9b628970 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Mon, 10 Jun 2024 19:08:15 +0000
Subject: [PATCH 2/2] Some updates

---
 tests/system/small/test_dataframe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 3e50485fc8..d5854bd8d0 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3731,7 +3731,7 @@ def test_df_setattr_index():
     )
     bf_df = dataframe.DataFrame(pd_df)
 
-    pd_df.index = typing.cast(pandas.Index, [4, 5])
+    pd_df.index = pandas.Index([4, 5])
     bf_df.index = [4, 5]
 
     assert_pandas_df_equal(
@@ -3745,9 +3745,9 @@ def test_df_setattr_columns():
     )
     bf_df = dataframe.DataFrame(pd_df)
 
-    pd_df.columns = typing.cast(pandas.Index, [4, 5, 6])
+    pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
 
-    bf_df.columns = typing.cast(pandas.Index, [4, 5, 6])
+    bf_df.columns = pandas.Index([4, 5, 6])
 
     assert_pandas_df_equal(
         pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False