-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: Add dtype argument to read_sql_query (GH10285) #37546
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
bcbe5ea
9c4f034
620c0ab
bcef60e
5c88e5c
24308c4
d6cc4b7
5de64f2
e9be344
d7d4439
dbf1f5f
a4e7cdf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7,12 +7,13 @@ | |||||
from datetime import date, datetime, time | ||||||
from functools import partial | ||||||
import re | ||||||
from typing import Iterator, List, Optional, Union, overload | ||||||
from typing import Dict, Iterator, List, Optional, Union, overload | ||||||
import warnings | ||||||
|
||||||
import numpy as np | ||||||
|
||||||
import pandas._libs.lib as lib | ||||||
from pandas._typing import Dtype | ||||||
|
||||||
from pandas.core.dtypes.common import is_datetime64tz_dtype, is_dict_like, is_list_like | ||||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||||||
|
@@ -119,10 +120,15 @@ def _parse_date_columns(data_frame, parse_dates): | |||||
return data_frame | ||||||
|
||||||
|
||||||
def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None): | ||||||
def _wrap_result( | ||||||
data, columns, index_col=None, coerce_float=True, parse_dates=None, dtype=None | ||||||
): | ||||||
"""Wrap result set of query in a DataFrame.""" | ||||||
frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) | ||||||
|
||||||
if dtype: | ||||||
frame = frame.astype(dtype) | ||||||
|
||||||
frame = _parse_date_columns(frame, parse_dates) | ||||||
|
||||||
if index_col is not None: | ||||||
|
@@ -295,6 +301,7 @@ def read_sql_query( | |||||
params=None, | ||||||
parse_dates=None, | ||||||
chunksize: None = None, | ||||||
dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this type actually is pretty usefule, can you define in _typing, call it DtypeOrDictDtype / DtypeTable and add a comment about it. cc @simonjayhawkins @WillAyd @jorisvandenbossche for the name here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, added |
||||||
) -> DataFrame: | ||||||
... | ||||||
|
||||||
|
@@ -308,6 +315,7 @@ def read_sql_query( | |||||
params=None, | ||||||
parse_dates=None, | ||||||
chunksize: int = 1, | ||||||
dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None, | ||||||
) -> Iterator[DataFrame]: | ||||||
... | ||||||
|
||||||
|
@@ -320,6 +328,7 @@ def read_sql_query( | |||||
params=None, | ||||||
parse_dates=None, | ||||||
chunksize: Optional[int] = None, | ||||||
dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None, | ||||||
) -> Union[DataFrame, Iterator[DataFrame]]: | ||||||
""" | ||||||
Read SQL query into a DataFrame. | ||||||
|
@@ -358,6 +367,9 @@ def read_sql_query( | |||||
chunksize : int, default None | ||||||
If specified, return an iterator where `chunksize` is the number of | ||||||
rows to include in each chunk. | ||||||
dtype : Type name or dict of columns | ||||||
Data type for data or columns. E.g. np.float64 or | ||||||
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need a versionadded 1.3 here. ok to add in next PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I see i didn't commit that change. But will indeed add it to the follow on |
||||||
|
||||||
Returns | ||||||
------- | ||||||
|
@@ -381,6 +393,7 @@ def read_sql_query( | |||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
chunksize=chunksize, | ||||||
dtype=dtype, | ||||||
) | ||||||
|
||||||
|
||||||
|
@@ -1225,7 +1238,13 @@ def read_table( | |||||
|
||||||
@staticmethod | ||||||
def _query_iterator( | ||||||
result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None | ||||||
result, | ||||||
chunksize, | ||||||
columns, | ||||||
index_col=None, | ||||||
coerce_float=True, | ||||||
parse_dates=None, | ||||||
dtype=None, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type anywhere you are adding type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||||||
): | ||||||
"""Return generator through chunked result set""" | ||||||
while True: | ||||||
|
@@ -1239,6 +1258,7 @@ def _query_iterator( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
|
||||||
def read_query( | ||||||
|
@@ -1249,6 +1269,7 @@ def read_query( | |||||
parse_dates=None, | ||||||
params=None, | ||||||
chunksize=None, | ||||||
dtype=None, | ||||||
): | ||||||
""" | ||||||
Read SQL query into a DataFrame. | ||||||
|
@@ -1304,6 +1325,7 @@ def read_query( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
else: | ||||||
data = result.fetchall() | ||||||
|
@@ -1313,6 +1335,7 @@ def read_query( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
return frame | ||||||
|
||||||
|
@@ -1712,7 +1735,13 @@ def execute(self, *args, **kwargs): | |||||
|
||||||
@staticmethod | ||||||
def _query_iterator( | ||||||
cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None | ||||||
cursor, | ||||||
chunksize, | ||||||
columns, | ||||||
index_col=None, | ||||||
coerce_float=True, | ||||||
parse_dates=None, | ||||||
dtype=None, | ||||||
): | ||||||
"""Return generator through chunked result set""" | ||||||
while True: | ||||||
|
@@ -1729,6 +1758,7 @@ def _query_iterator( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
|
||||||
def read_query( | ||||||
|
@@ -1739,6 +1769,7 @@ def read_query( | |||||
params=None, | ||||||
parse_dates=None, | ||||||
chunksize=None, | ||||||
dtype=None, | ||||||
): | ||||||
|
||||||
args = _convert_params(sql, params) | ||||||
|
@@ -1753,6 +1784,7 @@ def read_query( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
else: | ||||||
data = self._fetchall_as_list(cursor) | ||||||
|
@@ -1764,6 +1796,7 @@ def read_query( | |||||
index_col=index_col, | ||||||
coerce_float=coerce_float, | ||||||
parse_dates=parse_dates, | ||||||
dtype=dtype, | ||||||
) | ||||||
return frame | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -857,6 +857,25 @@ def test_multiindex_roundtrip(self): | |
) | ||
tm.assert_frame_equal(df, result, check_index_type=True) | ||
|
||
@pytest.mark.parametrize( | ||
"dtype, expected", | ||
[ | ||
(None, [float, float]), | ||
(int, [int, int]), | ||
(float, [float, float]), | ||
({"SepalLength": int, "SepalWidth": float}, [int, float]), | ||
], | ||
) | ||
def test_dtype_argument(self, dtype, expected): | ||
# GH10285 Add dtype argument to read_sql_query | ||
result = sql.read_sql_query( | ||
"SELECT SepalLength, SepalWidth FROM iris", self.conn, dtype=dtype | ||
) | ||
assert result.dtypes.to_dict() == { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you constructed an expected frame and use tm.assert_frame_equal |
||
"SepalLength": expected[0], | ||
"SepalWidth": expected[1], | ||
} | ||
|
||
def test_integer_col_names(self): | ||
df = DataFrame([[1, 2], [3, 4]], columns=[0, 1]) | ||
sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to 1.3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done