Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions pandas-stubs/io/parsers/readers.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def read_csv(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you try Mapping[int | str, Callable[[str], Any]] instead of the union?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried that first. But it seems Mapping isn't covariant over the key, only the value: python/typing#445

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But mapping may work for that the Type[str] issue since that is the value...will try that

I also wonder if creating a covariant TypeVar and then a Mapping[T_co, ...] will work...I will also try that.

| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down Expand Up @@ -111,7 +114,10 @@ def read_csv(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down Expand Up @@ -174,7 +180,10 @@ def read_csv(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down Expand Up @@ -237,7 +246,10 @@ def read_table(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down Expand Up @@ -300,7 +312,10 @@ def read_table(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down Expand Up @@ -363,7 +378,10 @@ def read_table(
| None = ...,
dtype: DtypeArg | defaultdict | None = ...,
engine: CSVEngine | None = ...,
converters: dict[int | str, Callable[[str], Any]] | None = ...,
converters: dict[int | str, Callable[[str], Any]]
| dict[int, Callable[[str], Any]]
| dict[str, Callable[[str], Any]]
| None = ...,
true_values: list[str] = ...,
false_values: list[str] = ...,
skipinitialspace: bool = ...,
Expand Down
41 changes: 41 additions & 0 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
List,
Mapping,
Tuple,
TypedDict,
TypeVar,
Union,
)
Expand Down Expand Up @@ -1278,6 +1279,46 @@ def test_read_csv() -> None:
pd.DataFrame,
)

# Allow a variety of dict types for the converters parameter
converters1 = {"A": lambda x: str, "B": lambda x: str}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally I wanted this dictionary to be {"A": str, "B": str}. But the type of str is Type[str], which apparently isn't compatible with Callable[[str], Any]. Any idea if this is fixable?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be an issue with using a lambda function, which is untyped. So if you did

def convert_to_str(a: object) -> str:
    return str(a)

and then

converters1 = {"A":convert_to_str, "B": convert_to_str}

that might work

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, the current version works fine because it's actually a Callable. What isn't working is passing Type[str] directly. That makes the dict a Dict[str, Type[str]]. That's what I was wondering about.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the type checkers know that str could be either a type or a callable.

check(
assert_type(pd.read_csv(path, converters=converters1), pd.DataFrame),
pd.DataFrame,
)
converters2 = {"A": lambda x: str, "B": lambda x: float}
check(
assert_type(pd.read_csv(path, converters=converters2), pd.DataFrame),
pd.DataFrame,
)
converters3 = {0: lambda x: str, 1: lambda x: str}
check(
assert_type(pd.read_csv(path, converters=converters3), pd.DataFrame),
pd.DataFrame,
)
converters4 = {0: lambda x: str, 1: lambda x: float}
check(
assert_type(pd.read_csv(path, converters=converters4), pd.DataFrame),
pd.DataFrame,
)
converters5: dict[int | str, Callable[[str], Any]] = {
0: lambda x: str,
"A": lambda x: float,
}
check(
assert_type(pd.read_csv(path, converters=converters5), pd.DataFrame),
pd.DataFrame,
)

class ReadCsvKwargs(TypedDict):
converters: dict[int, Callable[[str], Any]]

read_csv_kwargs: ReadCsvKwargs = {"converters": {0: int}}

check(
assert_type(pd.read_csv(path, **read_csv_kwargs), pd.DataFrame),
pd.DataFrame,
)


def test_groupby_series_methods() -> None:
df = pd.DataFrame({"x": [1, 2, 2, 3, 3], "y": [10, 20, 30, 40, 50]})
Expand Down