Skip to content

chore: cleanup type errors in tests/system/small/test_dataframe.py #771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 52 additions & 65 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import sys
import tempfile
import typing
from typing import Tuple
from typing import Dict, List, Tuple

import geopandas as gpd # type: ignore
import numpy as np
Expand Down Expand Up @@ -146,9 +146,9 @@ def test_df_construct_inline_respects_location():
with bpd.option_context("bigquery.location", "europe-west1"):
df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
repr(df)
assert df.query_job is not None
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)

# TODO(b/340876936): fix type error
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination) # type: ignore
assert table.location == "europe-west1"


Expand Down Expand Up @@ -753,10 +753,9 @@ def test_assign_listlike_to_empty_df(session):
def test_assign_to_empty_df_multiindex_error(session):
empty_df = dataframe.DataFrame(session=session)
empty_pandas_df = pd.DataFrame()
# TODO(b/340876936): fix type error
empty_df["empty_col_1"] = [] # type: ignore
# TODO(b/340876936): fix type error
empty_df["empty_col_2"] = [] # type: ignore

empty_df["empty_col_1"] = typing.cast(series.Series, [])
empty_df["empty_col_2"] = typing.cast(series.Series, [])
empty_pandas_df["empty_col_1"] = []
empty_pandas_df["empty_col_2"] = []
empty_df = empty_df.set_index(["empty_col_1", "empty_col_2"])
Expand Down Expand Up @@ -1340,40 +1339,25 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):

def test_get_dtypes(scalars_df_default_index):
dtypes = scalars_df_default_index.dtypes
dtypes_dict: Dict[str, bigframes.dtypes.Dtype] = {
"bool_col": pd.BooleanDtype(),
"bytes_col": pd.ArrowDtype(pa.binary()),
"date_col": pd.ArrowDtype(pa.date32()),
"datetime_col": pd.ArrowDtype(pa.timestamp("us")),
"geography_col": gpd.array.GeometryDtype(),
"int64_col": pd.Int64Dtype(),
"int64_too": pd.Int64Dtype(),
"numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)),
"float64_col": pd.Float64Dtype(),
"rowindex": pd.Int64Dtype(),
"rowindex_2": pd.Int64Dtype(),
"string_col": pd.StringDtype(storage="pyarrow"),
"time_col": pd.ArrowDtype(pa.time64("us")),
"timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")),
}
pd.testing.assert_series_equal(
dtypes,
pd.Series(
{
# TODO(b/340876936): fix type error
"bool_col": pd.BooleanDtype(), # type: ignore
# TODO(b/340876936): fix type error
"bytes_col": pd.ArrowDtype(pa.binary()), # type: ignore
# TODO(b/340876936): fix type error
"date_col": pd.ArrowDtype(pa.date32()), # type: ignore
# TODO(b/340876936): fix type error
"datetime_col": pd.ArrowDtype(pa.timestamp("us")), # type: ignore
# TODO(b/340876936): fix type error
"geography_col": gpd.array.GeometryDtype(), # type: ignore
# TODO(b/340876936): fix type error
"int64_col": pd.Int64Dtype(), # type: ignore
# TODO(b/340876936): fix type error
"int64_too": pd.Int64Dtype(), # type: ignore
# TODO(b/340876936): fix type error
"numeric_col": pd.ArrowDtype(pa.decimal128(38, 9)), # type: ignore
# TODO(b/340876936): fix type error
"float64_col": pd.Float64Dtype(), # type: ignore
# TODO(b/340876936): fix type error
"rowindex": pd.Int64Dtype(), # type: ignore
# TODO(b/340876936): fix type error
"rowindex_2": pd.Int64Dtype(), # type: ignore
# TODO(b/340876936): fix type error
"string_col": pd.StringDtype(storage="pyarrow"), # type: ignore
# TODO(b/340876936): fix type error
"time_col": pd.ArrowDtype(pa.time64("us")), # type: ignore
# TODO(b/340876936): fix type error
"timestamp_col": pd.ArrowDtype(pa.timestamp("us", tz="UTC")), # type: ignore
}
),
pd.Series(dtypes_dict),
)


Expand Down Expand Up @@ -1828,10 +1812,9 @@ def test_df_update(overwrite, filter_func):
if pd.__version__.startswith("1."):
pytest.skip("dtype handled differently in pandas 1.x.")

# TODO(b/340876936): fix type error
index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore
# TODO(b/340876936): fix type error
index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore
index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")

index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
pd_df1 = pandas.DataFrame(
{"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
)
Expand Down Expand Up @@ -1891,10 +1874,10 @@ def test_df_idxmax():
],
)
def test_df_align(join, axis):
# TODO(b/340876936): fix type error
index1 = pandas.Index([1, 2, 3, 4], dtype="Int64") # type: ignore
# TODO(b/340876936): fix type error
index2 = pandas.Index([1, 2, 4, 5], dtype="Int64") # type: ignore

index1: pandas.Index = pandas.Index([1, 2, 3, 4], dtype="Int64")

index2: pandas.Index = pandas.Index([1, 2, 4, 5], dtype="Int64")
pd_df1 = pandas.DataFrame(
{"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
)
Expand All @@ -1911,10 +1894,11 @@ def test_df_align(join, axis):
pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)

# Don't check dtype as pandas does unnecessary float conversion
# TODO(b/340876936): fix type error
pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False) # type: ignore
# TODO(b/340876936): fix type error
pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False) # type: ignore
assert isinstance(bf_result1, dataframe.DataFrame) and isinstance(
bf_result2, dataframe.DataFrame
)
pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)


def test_combine_first(
Expand Down Expand Up @@ -2568,11 +2552,15 @@ def test_df_transpose():
# Include some floats to ensure type coercion
values = [[0, 3.5, True], [1, 4.5, False], [2, 6.5, None]]
# Test complex case of both axes being multi-indices with non-unique elements
# TODO(b/340876936): fix type error
columns = pd.Index(["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore

columns: pandas.Index = pd.Index(
["A", "B", "A"], dtype=pd.StringDtype(storage="pyarrow")
)
columns_multi = pd.MultiIndex.from_arrays([columns, columns], names=["c1", "c2"])
# TODO(b/340876936): fix type error
index = pd.Index(["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")) # type: ignore

index: pandas.Index = pd.Index(
["b", "a", "a"], dtype=pd.StringDtype(storage="pyarrow")
)
rows_multi = pd.MultiIndex.from_arrays([index, index], names=["r1", "r2"])

pd_df = pandas.DataFrame(values, index=rows_multi, columns=columns_multi)
Expand Down Expand Up @@ -3742,10 +3730,9 @@ def test_df_setattr_index():
[[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
)
bf_df = dataframe.DataFrame(pd_df)
# TODO(b/340876936): fix type error
pd_df.index = [4, 5] # type: ignore
# TODO(b/340876936): fix type error
bf_df.index = [4, 5] # type: ignore

pd_df.index = pandas.Index([4, 5])
bf_df.index = [4, 5]

assert_pandas_df_equal(
pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
Expand All @@ -3757,10 +3744,10 @@ def test_df_setattr_columns():
[[1, 1, 1], [1, 1, 1]], columns=["index", "columns", "my_column"]
)
bf_df = dataframe.DataFrame(pd_df)
# TODO(b/340876936): fix type error
pd_df.columns = [4, 5, 6] # type: ignore
# TODO(b/340876936): fix type error
bf_df.columns = [4, 5, 6] # type: ignore

pd_df.columns = typing.cast(pandas.Index, pandas.Index([4, 5, 6]))

bf_df.columns = pandas.Index([4, 5, 6])

assert_pandas_df_equal(
pd_df, bf_df.to_pandas(), check_index_type=False, check_dtype=False
Expand Down Expand Up @@ -3854,8 +3841,8 @@ def test_iloc_list_multiindex(scalars_dfs):


def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
# TODO(b/340876936): fix type error
index_list = [] # type: ignore

index_list: List[int] = []

bf_result = scalars_df_index.iloc[index_list]
pd_result = scalars_pandas_df_index.iloc[index_list]
Expand Down