From 2d055fdf3249d6d2acdc72a868f9c2235f556ffd Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 3 Jun 2022 14:06:32 +0200 Subject: [PATCH 1/3] REGR: concat not sorting columns for mixed column names --- doc/source/whatsnew/v1.4.3.rst | 1 + pandas/core/indexes/api.py | 7 +++++- pandas/tests/reshape/concat/test_dataframe.py | 23 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index 54cad82366e43..5898e51ab5f52 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.replace` when the replacement value was explicitly ``None`` when passed in a dictionary to ``to_replace`` also casting other columns to object dtype even when there were no values to replace (:issue:`46634`) - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`) +- Fixed regression in :func:`concat` not sorting columns for mixed column names (:issue:`47127`) - Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`) - Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`) - Fixed regression in :func:`read_csv` with ``index_col=False`` identifying first row as index names when ``header=None`` (:issue:`46955`) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 922c344510375..ea83bd933d37b 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -10,6 +10,7 @@ from pandas.core.dtypes.common import is_dtype_equal +from pandas.core.algorithms import safe_sort from pandas.core.indexes.base import ( Index, _new_Index, @@ -154,7 +155,11 @@ def _get_combined_index( if sort: try: - index = index.sort_values() + index_sorted = safe_sort(index) + if isinstance(index, MultiIndex): + index = MultiIndex.from_tuples(index_sorted, names=index.names) + else: + index = Index(index_sorted, name=index.name) except TypeError: pass diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 01763926c6d89..1018fc2806fee 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -205,3 +205,26 @@ def test_concat_copies(self, axis, order, ignore_index): for arr in res._iter_column_arrays(): for arr2 in df._iter_column_arrays(): assert not np.shares_memory(arr, arr2) + + def test_outer_sort_columns(self): + # GH#47127 + df1 = DataFrame({"A": [0], "B": [1], 0: 1}) + df2 = DataFrame({"A": [100]}) + result = concat([df1, df2], ignore_index=True, join="outer", sort=True) + expected = DataFrame({0: [1.0, np.nan], "A": [0, 100], "B": [1.0, np.nan]}) + tm.assert_frame_equal(result, expected) + + def test_inner_sort_columns(self): + # GH#47127 + df1 = DataFrame({"A": [0], "B": [1], 0: 1}) + df2 = DataFrame({"A": [100], 0: 2}) + result = concat([df1, df2], ignore_index=True, join="inner", sort=True) + expected = DataFrame({0: [1, 2], "A": [0, 100]}) + tm.assert_frame_equal(result, expected) + + def test_sort_columns_one_df(self): + # GH#47127 + df1 = DataFrame({"A": [100], 0: 2}) + result = concat([df1], ignore_index=True, join="inner", sort=True) + expected = DataFrame({0: [2], "A": [100]}) + tm.assert_frame_equal(result, expected) From 3b814f258ebf54432d2bb61902bb9d9bb4b493a1 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 3 Jun 2022 15:03:19 +0200 Subject: [PATCH 2/3] Fix mypy --- pandas/core/indexes/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index ea83bd933d37b..30c330e720ed4 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,9 @@ from __future__ import annotations import textwrap +from typing import cast + +import numpy as np from pandas._libs import ( NaT, @@ -156,6 +159,7 @@ def _get_combined_index( if sort: try: index_sorted = safe_sort(index) + index_sorted = cast(np.ndarray, index_sorted) if isinstance(index, MultiIndex): index = MultiIndex.from_tuples(index_sorted, names=index.names) else: From eacc54dda4457f63d034882b5f93399ddc169d9d Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 3 Jun 2022 15:22:01 +0200 Subject: [PATCH 3/3] Rename variable --- pandas/core/indexes/api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 30c330e720ed4..1e740132e3464 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -158,12 +158,12 @@ def _get_combined_index( if sort: try: - index_sorted = safe_sort(index) - index_sorted = cast(np.ndarray, index_sorted) + array_sorted = safe_sort(index) + array_sorted = cast(np.ndarray, array_sorted) if isinstance(index, MultiIndex): - index = MultiIndex.from_tuples(index_sorted, names=index.names) + index = MultiIndex.from_tuples(array_sorted, names=index.names) else: - index = Index(index_sorted, name=index.name) + index = Index(array_sorted, name=index.name) except TypeError: pass