From 14a2dae5a55ae329ad764aad1c568b7320197257 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Sat, 8 Jun 2024 20:01:41 +0000 Subject: [PATCH 1/2] chore: cleanup type errors in tests/system/small/test_dataframe.py --- tests/system/small/test_dataframe.py | 117 ++++++++++++--------------- 1 file changed, 52 insertions(+), 65 deletions(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index ffc09a1a1f..3e50485fc8 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -17,7 +17,7 @@ import sys import tempfile import typing -from typing import Tuple +from typing import Dict, List, Tuple import geopandas as gpd # type: ignore import numpy as np @@ -146,9 +146,9 @@ def test_df_construct_inline_respects_location(): with bpd.option_context("bigquery.location", "europe-west1"): df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]]) repr(df) + assert df.query_job is not None + table = bpd.get_global_session().bqclient.get_table(df.query_job.destination) - # TODO(b/340876936): fix type error - table = bpd.get_global_session().bqclient.get_table(df.query_job.destination) # type: ignore assert table.location == "europe-west1" @@ -753,10 +753,9 @@ def test_assign_listlike_to_empty_df(session): def test_assign_to_empty_df_multiindex_error(session): empty_df = dataframe.DataFrame(session=session) empty_pandas_df = pd.DataFrame() - # TODO(b/340876936): fix type error - empty_df["empty_col_1"] = [] # type: ignore - # TODO(b/340876936): fix type error - empty_df["empty_col_2"] = [] # type: ignore + + empty_df["empty_col_1"] = typing.cast(series.Series, []) + empty_df["empty_col_2"] = typing.cast(series.Series, []) empty_pandas_df["empty_col_1"] = [] empty_pandas_df["empty_col_2"] = [] empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"]) @@ -1340,40 +1339,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how): def test_get_dtypes(scalars_df_default_index): dtypes = scalars_df_default_index.dtypes + dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = { + "bool_col": pd.BooleanDtype(), + "bytes_col": pd.ArrowDtype(pa.binary()), + "date_col": pd.ArrowDtype(pa.date32()), + "datetime_col": pd.ArrowDtype(pa.timestamp("us")), + "geography_col": gpd.array.GeometryDtype(), + "int64_col": pd.Int64Dtype(), + "int64_too": pd.Int64Dtype(), + "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)), + "float64_col": pd.Float64Dtype(), + "rowindex": pd.Int64Dtype(), + "rowindex_2": pd.Int64Dtype(), + "string_col": pd.StringDtype(storage="pyarrow"), + "time_col": pd.ArrowDtype(pa.time64("us")), + "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")), + } pd.testing.assert_series_equal( dtypes, - pd.Series( - { - # TODO(b/340876936): fix type error - "bool_col": pd.BooleanDtype(), # type: ignore - # TODO(b/340876936): fix type error - "bytes_col": pd.ArrowDtype(pa.binary()), # type: ignore - # TODO(b/340876936): fix type error - "date_col": pd.ArrowDtype(pa.date32()), # type: ignore - # TODO(b/340876936): fix type error - "datetime_col": pd.ArrowDtype(pa.timestamp("us")), # type: ignore - # TODO(b/340876936): fix type error - "geography_col": gpd.array.GeometryDtype(), # type: ignore - # TODO(b/340876936): fix type error - "int64_col": pd.Int64Dtype(), # type: ignore - # TODO(b/340876936): fix type error - "int64_too": pd.Int64Dtype(), # type: ignore - # TODO(b/340876936): fix type error - "numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)), # type: ignore - # TODO(b/340876936): fix type error - "float64_col": pd.Float64Dtype(), # type: ignore - # TODO(b/340876936): fix type error - "rowindex": pd.Int64Dtype(), # type: ignore - # TODO(b/340876936): fix type error - "rowindex_2": pd.Int64Dtype(), # type: ignore - # TODO(b/340876936): fix type error - "string_col": pd.StringDtype(storage="pyarrow"), # type: ignore - # TODO(b/340876936): fix type error - "time_col": pd.ArrowDtype(pa.time64("us")), # type: ignore - # TODO(b/340876936): fix type error - "timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")), # type: ignore - } - ), + pd.Series(dtypes_dict), ) @@ -1828,10 +1812,9 @@ def test_df_update(overwrite, filter_func): if pd.__version__.startswith("1."): pytest.skip("dtype handled differently in pandas 1.x.") - # TODO(b/340876936): fix type error - index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore - # TODO(b/340876936): fix type error - index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore + index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64") + + index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64") pd_df1 = pandas.DataFrame( {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1 ) @@ -1891,10 +1874,10 @@ def test_df_idxmax(): ], ) def test_df_align(join, axis): - # TODO(b/340876936): fix type error - index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore - # TODO(b/340876936): fix type error - index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore + + index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64") + + index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64") pd_df1 = pandas.DataFrame( {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1 ) @@ -1911,10 +1894,11 @@ def test_df_align(join, axis): pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis) # Don't check dtype as pandas does unnecessary float conversion - # TODO(b/340876936): fix type error - pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False) # type: ignore - # TODO(b/340876936): fix type error - pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False) # type: ignore + assert isinstance(bf_result1, dataframe.DataFrame) and isinstance( + bf_result2, dataframe.DataFrame + ) + pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False) + pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False) def test_combine_first( @@ -2568,11 +2552,15 @@ def test_df_transpose(): # Include some floats to ensure type coercion values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]] # Test complex case of both axes being multi-indices with non-unique elements - # TODO(b/340876936): fix type error - columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore + + columns: pandas.Index = pd.Index( + ["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow") + ) columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"]) - # TODO(b/340876936): fix type error - index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore + + index: pandas.Index = pd.Index( + ["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow") + ) rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"]) pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi) @@ -3742,10 +3730,9 @@ def test_df_setattr_index(): [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"] ) bf_df = dataframe.DataFrame(pd_df) - # TODO(b/340876936): fix type error - pd_df.index = [4, 5] # type: ignore - # TODO(b/340876936): fix type error - bf_df.index = [4, 5] # type: ignore + + pd_df.index = typing.cast(pandas.Index, [4, 5]) + bf_df.index = [4, 5] assert_pandas_df_equal( pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False @@ -3757,10 +3744,10 @@ def test_df_setattr_columns(): [[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"] ) bf_df = dataframe.DataFrame(pd_df) - # TODO(b/340876936): fix type error - pd_df.columns = [4, 5, 6] # type: ignore - # TODO(b/340876936): fix type error - bf_df.columns = [4, 5, 6] # type: ignore + + pd_df.columns = typing.cast(pandas.Index, [4, 5, 6]) + + bf_df.columns = typing.cast(pandas.Index, [4, 5, 6]) assert_pandas_df_equal( pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False @@ -3854,8 +3841,8 @@ def test_iloc_list_multiindex(scalars_dfs): def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index): - # TODO(b/340876936): fix type error - index_list = [] # type: ignore + + index_list: List[int] = [] bf_result = scalars_df_index.iloc[index_list] pd_result = scalars_pandas_df_index.iloc[index_list] From 1d1ad612608d14cf5c3dbbc43d5ee73c9b628970 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Mon, 10 Jun 2024 19:08:15 +0000 Subject: [PATCH 2/2] Some updates --- tests/system/small/test_dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 3e50485fc8..d5854bd8d0 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3731,7 +3731,7 @@ def test_df_setattr_index(): ) bf_df = dataframe.DataFrame(pd_df) - pd_df.index = typing.cast(pandas.Index, [4, 5]) + pd_df.index = pandas.Index([4, 5]) bf_df.index = [4, 5] assert_pandas_df_equal( @@ -3745,9 +3745,9 @@ def test_df_setattr_columns(): ) bf_df = dataframe.DataFrame(pd_df) - pd_df.columns = typing.cast(pandas.Index, [4, 5, 6]) + pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6])) - bf_df.columns = typing.cast(pandas.Index, [4, 5, 6]) + bf_df.columns = pandas.Index([4, 5, 6]) assert_pandas_df_equal( pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False