From 0f5c1d6957e31f96dec79d4477682a903b6c22fb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 13 May 2014 21:49:23 +0200 Subject: [PATCH 1/3] SQL: add release notes for refactor (GH6292) --- doc/source/release.rst | 4 +++ doc/source/v0.14.0.txt | 67 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/doc/source/release.rst b/doc/source/release.rst index 739d8ba46ec4e..01468e35a037c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -66,6 +66,8 @@ New features in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`) - Added error bar support to the ``.plot`` method of ``DataFrame`` and ``Series`` (:issue:`3796`, :issue:`6834`) - Implemented ``Panel.pct_change`` (:issue:`6904`) +- The SQL reading and writing functions now support more database flavors + through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). API Changes ~~~~~~~~~~~ @@ -257,6 +259,8 @@ Deprecations - The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). +- The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. + - The `percentile_width` keyword argument in :meth:`~DataFrame.describe` has been deprecated. Use the `percentiles` keyword instead, which takes a list of percentiles to display. The default output is unchanged. diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 69da6d52d21ff..5f9a2711114c2 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -340,6 +340,71 @@ More consistent behaviour for some groupby methods: SQL ~~~ +The SQL reading and writing functions now support more database flavors +through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). +All databases supported by SQLAlchemy can be used, such +as PostgreSQL, MySQL, Oracle, Microsoft SQL server (see documentation of +SQLAlchemy on `included dialects +`_). + +The functionality of providing DBAPI connection objects will only be supported +for sqlite3 in the future. The ``'mysql'`` flavor is deprecated. + +The new functions :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table` +are introduced. The function :func:`~pandas.read_sql` is kept as a convenience +wrapper around the other two and will delegate to specific function depending on +the provided input (database table name or sql query). + +In practice, you have to provide a SQLAlchemy ``engine`` to the sql functions. +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. For an in-memory sqlite database: + +.. ipython:: python + + from sqlalchemy import create_engine + # Create your connection. + engine = create_engine('sqlite:///:memory:') + +This ``engine`` can then be used to write or read data to/from this database: + +.. ipython:: python + + df = pd.DataFrame({'A': [1,2,3], 'B': ['a', 'b', 'c']}) + df.to_sql('db_table', engine, index=False) + +You can read data from a database by specifying the table name: + +.. ipython:: python + + pd.read_sql_table('db_table', engine) + +or by specifying a sql query: + +.. ipython:: python + + pd.read_sql_query('SELECT * FROM db_table', engine) + +Some other enhancements to the sql functions include: + +- support for writing the index. This can be controlled with the ``index`` + keyword (default is True). +- specify the column label to use when writing the index with ``index_label``. +- specify string columns to parse as datetimes withh the ``parse_dates`` + keyword in :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table`. + +.. warning:: + + Some of the existing functions or function aliases have been deprecated + and will be removed in future versions. This includes: ``tquery``, ``uquery``, + ``read_frame``, ``frame_query``, ``write_frame``. + +.. warning:: + + The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. + MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + + .. _whatsnew_0140.slicers: MultiIndexing Using Slicers @@ -573,6 +638,8 @@ Deprecations - The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + - The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. + - The `percentile_width` keyword argument in :meth:`~DataFrame.describe` has been deprecated. Use the `percentiles` keyword instead, which takes a list of percentiles to display. The default output is unchanged. From 03126053aa07cbdf4a437df65b79d85832f47854 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 15 May 2014 22:34:43 +0200 Subject: [PATCH 2/3] DOC: update sql docs - minor edits - remove mention of object PandasSQLWithEngine, as this API is not yet finalized yet, so not yet meant for public use --- doc/source/io.rst | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 9fdf26172cab2..8b4e450ef80c7 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3160,7 +3160,8 @@ your database. .. versionadded:: 0.14.0 If SQLAlchemy is not installed, a fallback is only provided for sqlite (and -for mysql for backwards compatibility, but this is deprecated). +for mysql for backwards compatibility, but this is deprecated and will be +removed in a future version). This mode requires a Python database adapter which respect the `Python DB-API `__. @@ -3190,14 +3191,12 @@ engine. You can use a temporary SQLite database where data are stored in To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine object from database URI. You only need to create the engine once per database you are connecting to. - For more information on :func:`create_engine` and the URI formatting, see the examples below and the SQLAlchemy `documentation `__ .. ipython:: python from sqlalchemy import create_engine - from pandas.io import sql # Create your connection. engine = create_engine('sqlite:///:memory:') @@ -3280,8 +3279,6 @@ to pass to :func:`pandas.to_datetime`: You can check if a table exists using :func:`~pandas.io.sql.has_table` -In addition, the class :class:`~pandas.io.sql.PandasSQLWithEngine` can be -instantiated directly for more manual control over the SQL interaction. Querying ~~~~~~~~ @@ -3310,18 +3307,18 @@ variant appropriate for your database. .. code-block:: python + from pandas.io import sql sql.execute('SELECT * FROM table_name', engine) - sql.execute('INSERT INTO table_name VALUES(?, ?, ?)', engine, params=[('id', 1, 12.2, True)]) -In addition, the class :class:`~pandas.io.sql.PandasSQLWithEngine` can be -instantiated directly for more manual control over the SQL interaction. - - Engine connection examples ~~~~~~~~~~~~~~~~~~~~~~~~~~ +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. + .. code-block:: python from sqlalchemy import create_engine @@ -3341,6 +3338,8 @@ Engine connection examples # or absolute, starting with a slash: engine = create_engine('sqlite:////absolute/path/to/foo.db') +For more information see the examples the SQLAlchemy `documentation `__ + Sqlite fallback ~~~~~~~~~~~~~~~ @@ -3354,16 +3353,14 @@ You can create connections like so: .. code-block:: python import sqlite3 - from pandas.io import sql - cnx = sqlite3.connect(':memory:') + con = sqlite3.connect(':memory:') And then issue the following queries: .. code-block:: python data.to_sql('data', cnx) - - sql.read_sql("SELECT * FROM data", cnx) + pd.read_sql_query("SELECT * FROM data", con) .. _io.bigquery: From ad97d2740201f6b7687dfccebd85cc348e8bfbe3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 16 May 2014 11:25:39 +0200 Subject: [PATCH 3/3] SQL: update some signatures and docstrings: - remove meta kwarg from read_sql_table (see discussion in #6300) - remove flavor kwarg from read_sql (not necessary + not there in 0.13, so would have been API change) - update docstring of to_sql in generic with latest changes - enhance docstring of get_schema --- pandas/core/generic.py | 5 ++-- pandas/io/sql.py | 49 +++++++++++++++++++------------------ pandas/io/tests/test_sql.py | 2 +- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 04ab4fb14d512..0e5ca6afdb56b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -928,10 +928,11 @@ def to_sql(self, name, con, flavor='sqlite', if_exists='fail', index=True, con : SQLAlchemy engine or DBAPI2 connection (legacy mode) Using SQLAlchemy makes it possible to use any DB supported by that library. - If a DBAPI2 object is given, a supported SQL flavor must also be provided + If a DBAPI2 object, only sqlite3 is supported. flavor : {'sqlite', 'mysql'}, default 'sqlite' The flavor of SQL to use. Ignored when using SQLAlchemy engine. - Required when using DBAPI2 connection. + 'mysql' is deprecated and will be removed in future versions, but it + will be further supported through SQLAlchemy engines. if_exists : {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 347370eaee92f..aa08c95c4f1c3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -221,8 +221,8 @@ def uquery(sql, con=None, cur=None, retry=True, params=None): #------------------------------------------------------------------------------ #--- Read and write to DataFrames -def read_sql_table(table_name, con, meta=None, index_col=None, - coerce_float=True, parse_dates=None, columns=None): +def read_sql_table(table_name, con, index_col=None, coerce_float=True, + parse_dates=None, columns=None): """Read SQL database table into a DataFrame. Given a table name and an SQLAlchemy engine, returns a DataFrame. @@ -234,8 +234,6 @@ def read_sql_table(table_name, con, meta=None, index_col=None, Name of SQL table in database con : SQLAlchemy engine Sqlite DBAPI conncection mode not supported - meta : SQLAlchemy meta, optional - If omitted MetaData is reflected from engine index_col : string, optional Column to set as index coerce_float : boolean, default True @@ -264,7 +262,7 @@ def read_sql_table(table_name, con, meta=None, index_col=None, """ - pandas_sql = PandasSQLAlchemy(con, meta=meta) + pandas_sql = PandasSQLAlchemy(con) table = pandas_sql.read_table( table_name, index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, columns=columns) @@ -292,11 +290,10 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, library. If a DBAPI2 object, only sqlite3 is supported. index_col : string, optional - column name to use for the returned DataFrame object. + Column name to use as index for the returned DataFrame object. coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets - cur : depreciated, cursor is obtained from connection params : list, tuple or dict, optional List of parameters to pass to execute method. parse_dates : list or dict @@ -325,8 +322,8 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=parse_dates) -def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, - params=None, parse_dates=None, columns=None): +def read_sql(sql, con, index_col=None, coerce_float=True, params=None, + parse_dates=None, columns=None): """ Read SQL query or database table into a DataFrame. @@ -339,15 +336,10 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, library. If a DBAPI2 object, only sqlite3 is supported. index_col : string, optional - column name to use for the returned DataFrame object. - flavor : string, {'sqlite', 'mysql'} - The flavor of SQL to use. Ignored when using - SQLAlchemy engine. Required when using DBAPI2 connection. - 'mysql' is still supported, but will be removed in future versions. + column name to use as index for the returned DataFrame object. coerce_float : boolean, default True Attempt to convert values to non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets - cur : depreciated, cursor is obtained from connection params : list, tuple or dict, optional List of parameters to pass to execute method. parse_dates : list or dict @@ -360,7 +352,8 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, Especially useful with databases without native Datetime support, such as SQLite columns : list - List of column names to select from sql table + List of column names to select from sql table (only used when reading + a table). Returns ------- @@ -379,7 +372,7 @@ def read_sql(sql, con, index_col=None, flavor='sqlite', coerce_float=True, read_sql_query : Read SQL query into a DataFrame """ - pandas_sql = pandasSQL_builder(con, flavor=flavor) + pandas_sql = pandasSQL_builder(con) if 'select' in sql.lower(): try: @@ -419,8 +412,8 @@ def to_sql(frame, name, con, flavor='sqlite', if_exists='fail', index=True, If a DBAPI2 object, only sqlite3 is supported. flavor : {'sqlite', 'mysql'}, default 'sqlite' The flavor of SQL to use. Ignored when using SQLAlchemy engine. - Required when using DBAPI2 connection. - 'mysql' is still supported, but will be removed in future versions. + 'mysql' is deprecated and will be removed in future versions, but it + will be further supported through SQLAlchemy engines. if_exists : {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. @@ -461,8 +454,8 @@ def has_table(table_name, con, flavor='sqlite'): If a DBAPI2 object, only sqlite3 is supported. flavor: {'sqlite', 'mysql'}, default 'sqlite' The flavor of SQL to use. Ignored when using SQLAlchemy engine. - Required when using DBAPI2 connection. - 'mysql' is still supported, but will be removed in future versions. + 'mysql' is deprecated and will be removed in future versions, but it + will be further supported through SQLAlchemy engines. Returns ------- @@ -1090,15 +1083,23 @@ def _create_sql_schema(self, frame, table_name): def get_schema(frame, name, flavor='sqlite', keys=None, con=None): """ - Get the SQL db table schema for the given frame + Get the SQL db table schema for the given frame. Parameters ---------- frame : DataFrame - name : name of SQL table + name : string + name of SQL table flavor : {'sqlite', 'mysql'}, default 'sqlite' - keys : columns to use a primary key + The flavor of SQL to use. Ignored when using SQLAlchemy engine. + 'mysql' is deprecated and will be removed in future versions, but it + will be further supported through SQLAlchemy engines. + keys : string or sequence + columns to use a primary key con: an open SQL database connection object or an SQLAlchemy engine + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. """ diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 6058c0923e3c1..a47feceb7f233 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -340,7 +340,7 @@ def test_read_sql_iris(self): def test_legacy_read_frame(self): with tm.assert_produces_warning(FutureWarning): iris_frame = sql.read_frame( - "SELECT * FROM iris", self.conn, flavor='sqlite') + "SELECT * FROM iris", self.conn) self._check_iris_loaded_frame(iris_frame) def test_to_sql(self):