diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 70d616ca72c1b..c679f53270ed1 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -137,11 +137,15 @@ Other enhancements - ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) - ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). + - ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`) - ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). - ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`). + +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). + - ``drop_duplicates`` and ``duplicated`` now accept ``keep`` keyword to target first, last, and all duplicates. ``take_last`` keyword is deprecated, see :ref:`deprecations ` (:issue:`6511`, :issue:`8505`) .. ipython :: python @@ -151,6 +155,35 @@ Other enhancements s.drop_duplicates(keep='last') s.drop_duplicates(keep=False) +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). + +- ``concat`` will now inherit the existing series names (even when some are missing), if new ones are not provided through the ``keys`` argument (:issue:`10698`). + + Previous Behavior: + + .. code-block:: python + + In [1]: foo = pd.Series([1,2], name='foo') + In [2]: bar = pd.Series([1,2]) + In [3]: baz = pd.Series([4,5]) + In [4] pd.concat([foo, bar, baz], 1) + Out[4]: + 0 1 2 + 0 1 1 4 + 1 2 2 5 + + New Behavior: + + .. ipython:: python + + foo = pd.Series([1,2], name='foo') + bar = pd.Series([1,2]) + baz = pd.Series([4,5]) + pd.concat([foo, bar, baz], 1) + +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). + +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). .. _whatsnew_0170.api: @@ -529,7 +562,6 @@ Deprecations ===================== ================================= - ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`). -- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was removed in favor of ``keep``. (:issue:`6511`, :issue:`8505`) .. _whatsnew_0170.prior_deprecations: @@ -616,9 +648,6 @@ Bug Fixes - Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`) -- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`) -- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`) - - Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). - Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 8eefe4ba98876..b587ec128c016 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -337,7 +337,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, from sqlalchemy.schema import MetaData meta = MetaData(con, schema=schema) try: - meta.reflect(only=[table_name]) + meta.reflect(only=[table_name], views=True) except sqlalchemy.exc.InvalidRequestError: raise ValueError("Table %s not found" % table_name) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 859c6d3250121..c78d193124b76 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -161,6 +161,12 @@ SELECT * FROM iris WHERE "Name"=%(name)s AND "SepalLength"=%(length)s """ + }, + 'create_view': { + 'sqlite': """ + CREATE VIEW iris_view AS + SELECT * FROM iris; + """ } } @@ -244,6 +250,10 @@ def _load_iris_data(self): for row in r: self._get_exec().execute(ins, row) + def _load_iris_view(self): + self.drop_table('iris_view') + self._get_exec().execute(SQL_STRINGS['create_view'][self.flavor]) + def _check_iris_loaded_frame(self, iris_frame): pytype = iris_frame.dtypes[0].type row = iris_frame.iloc[0] @@ -482,6 +492,7 @@ class _TestSQLApi(PandasSQLTest): def setUp(self): self.conn = self.connect() self._load_iris_data() + self._load_iris_view() self._load_test1_data() self._load_test2_data() self._load_test3_data() @@ -492,6 +503,11 @@ def test_read_sql_iris(self): "SELECT * FROM iris", self.conn) self._check_iris_loaded_frame(iris_frame) + def test_read_sql_view(self): + iris_frame = sql.read_sql_query( + "SELECT * FROM iris_view", self.conn) + self._check_iris_loaded_frame(iris_frame) + def test_legacy_read_frame(self): with tm.assert_produces_warning(FutureWarning): iris_frame = sql.read_frame( diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 430828a3db31b..d04cc8c4a7754 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -16,7 +16,7 @@ from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution -from pandas.core.common import ABCSeries +from pandas.core.common import ABCSeries, isnull from pandas.io.parsers import TextFileReader import pandas.core.common as com @@ -896,8 +896,14 @@ def get_result(self): data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) - if columns is not None: - tmpdf.columns = columns + # checks if the column variable already stores valid column names (because set via the 'key' argument + # in the 'concat' function call. If that's not the case, use the series names as column names + if columns.equals(Index(np.arange(len(self.objs)))): + columns = np.array([ data[i].name for i in range(len(data)) ], dtype='object') + indexer = isnull(columns) + if indexer.any(): + columns[indexer] = np.arange(len(indexer[indexer])) + tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') # combine block managers diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 8b1457e7fd490..3be283eff1bb4 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1797,6 +1797,15 @@ def test_concat_dataframe_keys_bug(self): self.assertEqual(list(result.columns), [('t1', 'value'), ('t2', 'value')]) + def test_concat_series_partial_columns_names(self): + foo = pd.Series([1,2], name='foo') + bar = pd.Series([1,2]) + baz = pd.Series([4,5]) + + result = pd.concat([foo, bar, baz], 1) + expected = DataFrame({'foo' : [1,2], 0 : [1,2], 1 : [4,5]}, columns=['foo',0,1]) + tm.assert_frame_equal(result, expected) + def test_concat_dict(self): frames = {'foo': DataFrame(np.random.randn(4, 3)), 'bar': DataFrame(np.random.randn(4, 3)), @@ -2330,7 +2339,7 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal(result.columns, lrange(2))) + self.assertTrue(np.array_equal(result.columns, Index(['A', 0], dtype='object'))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') @@ -2431,7 +2440,7 @@ def test_concat_series_axis1_same_names_ignore_index(self): s2 = Series(randn(len(dates)), index=dates, name='value') result = concat([s1, s2], axis=1, ignore_index=True) - self.assertTrue(np.array_equal(result.columns, [0, 1])) + self.assertTrue(np.array_equal(result.columns, ['value', 'value'])) def test_concat_iterables(self): from collections import deque, Iterable