Skip to content

Commit d11274c

Browse files
authored
chore: cleanup type errors in tests/system/small/test_dataframe.py (#771)
* chore: cleanup type errors in tests/system/small/test_dataframe.py * Some updates
1 parent 11af385 commit d11274c

File tree

1 file changed

+52
-65
lines changed

1 file changed

+52
-65
lines changed

tests/system/small/test_dataframe.py

Lines changed: 52 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import sys
1818
import tempfile
1919
import typing
20-
from typing import Tuple
20+
from typing import Dict, List, Tuple
2121

2222
import geopandas as gpd # type: ignore
2323
import numpy as np
@@ -146,9 +146,9 @@ def test_df_construct_inline_respects_location():
146146
with bpd.option_context("bigquery.location", "europe-west1"):
147147
df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
148148
repr(df)
149+
assert df.query_job is not None
150+
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
149151

150-
# TODO(b/340876936): fix type error
151-
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination) # type: ignore
152152
assert table.location == "europe-west1"
153153

154154

@@ -753,10 +753,9 @@ def test_assign_listlike_to_empty_df(session):
753753
def test_assign_to_empty_df_multiindex_error(session):
754754
empty_df = dataframe.DataFrame(session=session)
755755
empty_pandas_df = pd.DataFrame()
756-
# TODO(b/340876936): fix type error
757-
empty_df["empty_col_1"] = [] # type: ignore
758-
# TODO(b/340876936): fix type error
759-
empty_df["empty_col_2"] = [] # type: ignore
756+
757+
empty_df["empty_col_1"] = typing.cast(series.Series, [])
758+
empty_df["empty_col_2"] = typing.cast(series.Series, [])
760759
empty_pandas_df["empty_col_1"] = []
761760
empty_pandas_df["empty_col_2"] = []
762761
empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
@@ -1340,40 +1339,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
13401339

13411340
def test_get_dtypes(scalars_df_default_index):
13421341
dtypes = scalars_df_default_index.dtypes
1342+
dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
1343+
"bool_col": pd.BooleanDtype(),
1344+
"bytes_col": pd.ArrowDtype(pa.binary()),
1345+
"date_col": pd.ArrowDtype(pa.date32()),
1346+
"datetime_col": pd.ArrowDtype(pa.timestamp("us")),
1347+
"geography_col": gpd.array.GeometryDtype(),
1348+
"int64_col": pd.Int64Dtype(),
1349+
"int64_too": pd.Int64Dtype(),
1350+
"numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
1351+
"float64_col": pd.Float64Dtype(),
1352+
"rowindex": pd.Int64Dtype(),
1353+
"rowindex_2": pd.Int64Dtype(),
1354+
"string_col": pd.StringDtype(storage="pyarrow"),
1355+
"time_col": pd.ArrowDtype(pa.time64("us")),
1356+
"timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
1357+
}
13431358
pd.testing.assert_series_equal(
13441359
dtypes,
1345-
pd.Series(
1346-
{
1347-
# TODO(b/340876936): fix type error
1348-
"bool_col": pd.BooleanDtype(), # type: ignore
1349-
# TODO(b/340876936): fix type error
1350-
"bytes_col": pd.ArrowDtype(pa.binary()), # type: ignore
1351-
# TODO(b/340876936): fix type error
1352-
"date_col": pd.ArrowDtype(pa.date32()), # type: ignore
1353-
# TODO(b/340876936): fix type error
1354-
"datetime_col": pd.ArrowDtype(pa.timestamp("us")), # type: ignore
1355-
# TODO(b/340876936): fix type error
1356-
"geography_col": gpd.array.GeometryDtype(), # type: ignore
1357-
# TODO(b/340876936): fix type error
1358-
"int64_col": pd.Int64Dtype(), # type: ignore
1359-
# TODO(b/340876936): fix type error
1360-
"int64_too": pd.Int64Dtype(), # type: ignore
1361-
# TODO(b/340876936): fix type error
1362-
"numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)), # type: ignore
1363-
# TODO(b/340876936): fix type error
1364-
"float64_col": pd.Float64Dtype(), # type: ignore
1365-
# TODO(b/340876936): fix type error
1366-
"rowindex": pd.Int64Dtype(), # type: ignore
1367-
# TODO(b/340876936): fix type error
1368-
"rowindex_2": pd.Int64Dtype(), # type: ignore
1369-
# TODO(b/340876936): fix type error
1370-
"string_col": pd.StringDtype(storage="pyarrow"), # type: ignore
1371-
# TODO(b/340876936): fix type error
1372-
"time_col": pd.ArrowDtype(pa.time64("us")), # type: ignore
1373-
# TODO(b/340876936): fix type error
1374-
"timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")), # type: ignore
1375-
}
1376-
),
1360+
pd.Series(dtypes_dict),
13771361
)
13781362

13791363

@@ -1828,10 +1812,9 @@ def test_df_update(overwrite, filter_func):
18281812
if pd.__version__.startswith("1."):
18291813
pytest.skip("dtype handled differently in pandas 1.x.")
18301814

1831-
# TODO(b/340876936): fix type error
1832-
index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore
1833-
# TODO(b/340876936): fix type error
1834-
index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore
1815+
index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
1816+
1817+
index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
18351818
pd_df1 = pandas.DataFrame(
18361819
{"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
18371820
)
@@ -1891,10 +1874,10 @@ def test_df_idxmax():
18911874
],
18921875
)
18931876
def test_df_align(join, axis):
1894-
# TODO(b/340876936): fix type error
1895-
index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore
1896-
# TODO(b/340876936): fix type error
1897-
index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore
1877+
1878+
index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")
1879+
1880+
index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
18981881
pd_df1 = pandas.DataFrame(
18991882
{"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
19001883
)
@@ -1911,10 +1894,11 @@ def test_df_align(join, axis):
19111894
pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
19121895

19131896
# Don't check dtype as pandas does unnecessary float conversion
1914-
# TODO(b/340876936): fix type error
1915-
pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False) # type: ignore
1916-
# TODO(b/340876936): fix type error
1917-
pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False) # type: ignore
1897+
assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
1898+
bf_result2, dataframe.DataFrame
1899+
)
1900+
pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
1901+
pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
19181902

19191903

19201904
def test_combine_first(
@@ -2568,11 +2552,15 @@ def test_df_transpose():
25682552
# Include some floats to ensure type coercion
25692553
values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
25702554
# Test complex case of both axes being multi-indices with non-unique elements
2571-
# TODO(b/340876936): fix type error
2572-
columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore
2555+
2556+
columns: pandas.Index = pd.Index(
2557+
["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
2558+
)
25732559
columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
2574-
# TODO(b/340876936): fix type error
2575-
index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore
2560+
2561+
index: pandas.Index = pd.Index(
2562+
["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
2563+
)
25762564
rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])
25772565

25782566
pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
@@ -3742,10 +3730,9 @@ def test_df_setattr_index():
37423730
[[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
37433731
)
37443732
bf_df = dataframe.DataFrame(pd_df)
3745-
# TODO(b/340876936): fix type error
3746-
pd_df.index = [4, 5] # type: ignore
3747-
# TODO(b/340876936): fix type error
3748-
bf_df.index = [4, 5] # type: ignore
3733+
3734+
pd_df.index = pandas.Index([4, 5])
3735+
bf_df.index = [4, 5]
37493736

37503737
assert_pandas_df_equal(
37513738
pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3757,10 +3744,10 @@ def test_df_setattr_columns():
37573744
[[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
37583745
)
37593746
bf_df = dataframe.DataFrame(pd_df)
3760-
# TODO(b/340876936): fix type error
3761-
pd_df.columns = [4, 5, 6] # type: ignore
3762-
# TODO(b/340876936): fix type error
3763-
bf_df.columns = [4, 5, 6] # type: ignore
3747+
3748+
pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))
3749+
3750+
bf_df.columns = pandas.Index([4, 5, 6])
37643751

37653752
assert_pandas_df_equal(
37663753
pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
@@ -3854,8 +3841,8 @@ def test_iloc_list_multiindex(scalars_dfs):
38543841

38553842

38563843
def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
3857-
# TODO(b/340876936): fix type error
3858-
index_list = [] # type: ignore
3844+
3845+
index_list: List[int] = []
38593846

38603847
bf_result = scalars_df_index.iloc[index_list]
38613848
pd_result = scalars_pandas_df_index.iloc[index_list]

0 commit comments

Comments
 (0)