pandas-dev · jreback · Dec 23, 2020 · Oct 31, 2020 · Nov 1, 2020 · Nov 13, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -307,6 +307,7 @@ Other enhancements
 - Improve numerical stability for :meth:`.Rolling.skew`, :meth:`.Rolling.kurt`, :meth:`Expanding.skew` and :meth:`Expanding.kurt` through implementation of Kahan summation (:issue:`6929`)
 - Improved error reporting for subsetting columns of a :class:`.DataFrameGroupBy` with ``axis=1`` (:issue:`37725`)
 - Implement method ``cross`` for :meth:`DataFrame.merge` and :meth:`DataFrame.join` (:issue:`5401`)
+- :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
 - When :func:`read_csv/sas/json` are called with ``chuncksize``/``iterator`` they can be used in a ``with`` statement as they return context-managers (:issue:`38225`)
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -7,12 +7,13 @@
 from datetime import date, datetime, time
 from functools import partial
 import re
-from typing import Iterator, List, Optional, Union, overload
+from typing import Dict, Iterator, List, Optional, Union, overload
 import warnings
 
 import numpy as np
 
 import pandas._libs.lib as lib
+from pandas._typing import Dtype
 
 from pandas.core.dtypes.common import is_datetime64tz_dtype, is_dict_like, is_list_like
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -119,10 +120,15 @@ def _parse_date_columns(data_frame, parse_dates):
     return data_frame
 
 
-def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None):
+def _wrap_result(
+    data, columns, index_col=None, coerce_float=True, parse_dates=None, dtype=None
+):
     """Wrap result set of query in a DataFrame."""
     frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float)
 
+    if dtype:
+        frame = frame.astype(dtype)
+
     frame = _parse_date_columns(frame, parse_dates)
 
     if index_col is not None:
@@ -295,6 +301,7 @@ def read_sql_query(
     params=None,
     parse_dates=None,
     chunksize: None = None,
+    dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None,
-    dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None,
+    dtype: Optional[Union[Dtype, Dict[Label, Dtype]]] = None,
-    dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None,
+    dtype: Optional[Union[Dtype, Dict[Label, Dtype]]] = None,
 ) -> DataFrame:
     ...
 
@@ -308,6 +315,7 @@ def read_sql_query(
     params=None,
     parse_dates=None,
     chunksize: int = 1,
+    dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None,
 ) -> Iterator[DataFrame]:
     ...
 
@@ -320,6 +328,7 @@ def read_sql_query(
     params=None,
     parse_dates=None,
     chunksize: Optional[int] = None,
+    dtype: Optional[Union[Dtype, Dict[str, Dtype]]] = None,
 ) -> Union[DataFrame, Iterator[DataFrame]]:
     """
     Read SQL query into a DataFrame.
@@ -358,6 +367,9 @@ def read_sql_query(
     chunksize : int, default None
         If specified, return an iterator where `chunksize` is the number of
         rows to include in each chunk.
+    dtype : Type name or dict of columns
+        Data type for data or columns. E.g. np.float64 or
+        {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}
 
     Returns
     -------
@@ -381,6 +393,7 @@ def read_sql_query(
         coerce_float=coerce_float,
         parse_dates=parse_dates,
         chunksize=chunksize,
+        dtype=dtype,
     )
 
 
@@ -1225,7 +1238,13 @@ def read_table(
 
     @staticmethod
     def _query_iterator(
-        result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
+        result,
+        chunksize,
+        columns,
+        index_col=None,
+        coerce_float=True,
+        parse_dates=None,
+        dtype=None,
     ):
         """Return generator through chunked result set"""
         while True:
@@ -1239,6 +1258,7 @@ def _query_iterator(
                     index_col=index_col,
                     coerce_float=coerce_float,
                     parse_dates=parse_dates,
+                    dtype=dtype,
                 )
 
     def read_query(
@@ -1249,6 +1269,7 @@ def read_query(
         parse_dates=None,
         params=None,
         chunksize=None,
+        dtype=None,
     ):
         """
         Read SQL query into a DataFrame.
@@ -1304,6 +1325,7 @@ def read_query(
                 index_col=index_col,
                 coerce_float=coerce_float,
                 parse_dates=parse_dates,
+                dtype=dtype,
             )
         else:
             data = result.fetchall()
@@ -1313,6 +1335,7 @@ def read_query(
                 index_col=index_col,
                 coerce_float=coerce_float,
                 parse_dates=parse_dates,
+                dtype=dtype,
             )
             return frame
 
@@ -1712,7 +1735,13 @@ def execute(self, *args, **kwargs):
 
     @staticmethod
     def _query_iterator(
-        cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None
+        cursor,
+        chunksize,
+        columns,
+        index_col=None,
+        coerce_float=True,
+        parse_dates=None,
+        dtype=None,
     ):
         """Return generator through chunked result set"""
         while True:
@@ -1729,6 +1758,7 @@ def _query_iterator(
                     index_col=index_col,
                     coerce_float=coerce_float,
                     parse_dates=parse_dates,
+                    dtype=dtype,
                 )
 
     def read_query(
@@ -1739,6 +1769,7 @@ def read_query(
         params=None,
         parse_dates=None,
         chunksize=None,
+        dtype=None,
     ):
 
         args = _convert_params(sql, params)
@@ -1753,6 +1784,7 @@ def read_query(
                 index_col=index_col,
                 coerce_float=coerce_float,
                 parse_dates=parse_dates,
+                dtype=dtype,
             )
         else:
             data = self._fetchall_as_list(cursor)
@@ -1764,6 +1796,7 @@ def read_query(
                 index_col=index_col,
                 coerce_float=coerce_float,
                 parse_dates=parse_dates,
+                dtype=dtype,
             )
             return frame
 

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -857,6 +857,25 @@ def test_multiindex_roundtrip(self):
         )
         tm.assert_frame_equal(df, result, check_index_type=True)
 
+    @pytest.mark.parametrize(
+        "dtype, expected",
+        [
+            (None, [float, float]),
+            (int, [int, int]),
+            (float, [float, float]),
+            ({"SepalLength": int, "SepalWidth": float}, [int, float]),
+        ],
+    )
+    def test_dtype_argument(self, dtype, expected):
+        # GH10285 Add dtype argument to read_sql_query
+        result = sql.read_sql_query(
+            "SELECT SepalLength, SepalWidth FROM iris", self.conn, dtype=dtype
+        )
+        assert result.dtypes.to_dict() == {
+            "SepalLength": expected[0],
+            "SepalWidth": expected[1],
+        }
+
     def test_integer_col_names(self):
         df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
         sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace")