From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/10] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 13a29308370a4bd1411b4c4c3a0054bf01ad9b1e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 11:13:51 +0100 Subject: [PATCH 02/10] Add suffixes argument for pd.concat --- pandas/core/reshape/concat.py | 113 +++++++++++++++++++++ pandas/tests/reshape/test_concat.py | 151 ++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3efe8072d3323..3b6ca6a69f53c 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,6 +2,9 @@ concat routines """ +from collections import Counter +from functools import partial +from itertools import chain import warnings import numpy as np @@ -21,6 +24,7 @@ ) import pandas.core.indexes.base as ibase from pandas.core.internals import concatenate_block_managers +from pandas.core.internals.managers import _transform_index # --------------------------------------------------------------------- # Concatenate DataFrame objects @@ -37,6 +41,7 @@ def concat( names=None, verify_integrity: bool = False, sort=None, + suffixes=None, copy: bool = True, ): """ @@ -94,6 +99,15 @@ def concat( .. versionadded:: 0.23.0 + suffixes: tuple of str, default None + Suffix to apply to overlapping column names for each concatenated object + respectively. If None and there is overlapping column names after concat, + DataFrame with duplicated names will be outputted along with a warning + message. If the length of suffixes does not match with number of + concatenated objects, an error will raise. + + This has no effect if there is no overlapping column names or if axis=0. + copy : bool, default True If False, do not copy data unnecessarily. @@ -238,6 +252,16 @@ def concat( Traceback (most recent call last): ... ValueError: Indexes have overlapping values: ['a'] + + If objects have overlapping column names when passing in ``axis=1``, + specifying suffixes using tuple can add suffix to each object respecitvely. + + >>> df7 = pd.DataFrame({"a": [1, 2]}) + >>> df8 = pd.DataFrame({"a": [3, 4], "b": [4, 6]}) + >>> pd.concat([df7, df8], axis=1, suffixes=("_x", "_y")) + a_x a_y b + 0 1 3 4 + 1 2 4 6 """ op = _Concatenator( objs, @@ -251,6 +275,7 @@ def concat( verify_integrity=verify_integrity, copy=copy, sort=sort, + suffixes=suffixes, ) return op.get_result() @@ -274,6 +299,7 @@ def __init__( verify_integrity: bool = False, copy: bool = True, sort=False, + suffixes=None, ): if isinstance(objs, (NDFrame, str)): raise TypeError( @@ -418,6 +444,16 @@ def __init__( self.names = names or getattr(keys, "names", None) self.levels = levels self.sort = sort + self.suffixes = suffixes + + if self.axis == 0 and not self._is_series: + + # If objs is not composed of pure Series, and if BlockManager axis is 1, + # then will check the overlapping of columns, and directly rename them + # if overlapping is the case + self.objs = self._items_overlap_with_suffix( + self.objs, suffixes=self.suffixes + ) self.ignore_index = ignore_index self.verify_integrity = verify_integrity @@ -447,6 +483,10 @@ def get_result(self): index, columns = self.new_axes df = cons(data, index=index) + + # before assigning columns to composed DataFrame, check if columns + # are overlapped + columns = self._items_overlap_with_suffix(columns, self.suffixes) df.columns = columns return df.__finalize__(self, method="concat") @@ -589,6 +629,79 @@ def _maybe_check_integrity(self, concat_index: Index): "{overlap!s}".format(overlap=overlap) ) + def _items_overlap_with_suffix(self, objs, suffixes): + """ + Adding suffix for items if there is overlapping situation. + + Be aware that `objs` can be either DataFrame-like or Index-like given + if `self._is_series` is True or False. + + Since default is None, therefore, if overlapping is found and suffixes + is default None, will raise a warning and return the objs as is. + """ + if self._is_series: + + # when _is_series is True, objs are actually column Index + overlap_cols = [obj for obj in objs] + else: + overlap_cols = chain.from_iterable([obj.columns for obj in objs]) + to_rename = [col for col, cnt in Counter(overlap_cols).items() if cnt > 1] + + if len(to_rename) == 0: + return objs + + if suffixes is None: + + # this is to keep current behavior unchanged for users, so just raise + # a warning instead of error + warnings.warn( + "There will have duplicated columns after concatenation," + "you could avoid it by setting suffixes." + ) + return objs + + if not isinstance(suffixes, tuple): + raise ValueError( + "Invalid type {t} is assigned to suffixes, only" + " is allowed.".format(t=type(suffixes)) + ) + + if len(objs) != len(suffixes): + raise ValueError( + "Number of objects for concatenation is not" + "equal to number of suffixes" + ) + + def renamer(x, suffix): + """ + Rename the indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return "{x}{suffix}".format(x=x, suffix=suffix) + return x + + if self._is_series: + new_cols = [renamer(obj, suffix) for obj, suffix in zip(objs, suffixes)] + return new_cols + + for obj, suffix in zip(objs, suffixes): + col_renamer = partial(renamer, suffix=suffix) + obj.columns = _transform_index(obj.columns, col_renamer) + + return objs + def _concat_indexes(indexes) -> Index: return indexes[0].append(indexes[1:]) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 323b3126c2461..19e9146ba944d 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2776,3 +2776,154 @@ def test_concat_datetimeindex_freq(): expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) expected.index.freq = None tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "obj1, obj2", + [ + (pd.Series([1, 2, 3], name="a"), pd.Series([2, 3, 4], name="a")), + (pd.Series([1, 2, 3], name="a"), pd.DataFrame({"a": [2, 3, 4]})), + (pd.DataFrame({"a": [1, 2, 3]}), pd.DataFrame({"a": [2, 3, 4]})), + ], +) +def test_concat_suffixes_warning(obj1, obj2): + # GH 21791, add test for warning when suffix is None and columns overlap + with catch_warnings(record=True): + output = pd.concat([obj1, obj2], axis=1) + + tm.assert_series_equal(output.iloc[:, 0], pd.Series([1, 2, 3], name="a")) + tm.assert_series_equal(output.iloc[:, 1], pd.Series([2, 3, 4], name="a")) + + +@pytest.mark.parametrize("suffixes", ["_a", ("_x"), ["a", "b"]]) +def test_concat_suffixes_type(suffixes): + # GH 21791, like pd.merge, here suffixes type should be tuple + objs = [pd.Series([1, 2], name="a"), pd.DataFrame({"a": [2, 3]})] + with pytest.raises(ValueError, match="only is allowed"): + pd.concat(objs, axis=1, suffixes=suffixes) + + +@pytest.mark.parametrize( + "objs, suffixes", + [ + ( + [ + pd.Series([1, 2], name="a"), + pd.Series([2, 3], name="a"), + pd.Series([2, 3]), + ], + ("_x", "_y"), + ), + ( + [ + pd.DataFrame({"a": [1, 2]}), + pd.DataFrame({"a": [2, 3]}, pd.Series([1, 2])), + ], + ("_x", "_y", "_z", "_k"), + ), + ( + [pd.DataFrame({"a": [1, 2]}), pd.DataFrame({"a": [2, 3]})], + ("_x", "_y", "_z"), + ), + ], +) +def test_concat_suffixes_length_unmatch_error(objs, suffixes): + # GH 21791, add test to see if warning is raise when columns overlap but length of + # suffixes does not match the length of objs + with pytest.raises(ValueError, match="Number of objects for concatenation is not"): + pd.concat(objs, axis=1, suffixes=suffixes) + + +@pytest.mark.parametrize( + "objs, suffixes, expected", + [ + ( + [pd.Series([1, 2], name="a"), pd.Series([2, 3], name="a")], + ("_x", "_y"), + pd.DataFrame({"a_x": [1, 2], "a_y": [2, 3]}), + ), + ( + [ + pd.Series([1, 2]), + pd.Series([2, 3], name="b"), + pd.Series([3, 4], name="b"), + ], + ("_x", "_y", "_z"), + pd.DataFrame({0: [1, 2], "b_y": [2, 3], "b_z": [3, 4]}), + ), + ( + [ + pd.Series([1, 2], name="a"), + pd.Series([2, 3], name="b"), + pd.Series([3, 4], name="b"), + pd.Series([3, 5], name="a"), + ], + ("_x", "_y", "_z", "_k"), + pd.DataFrame({"a_x": [1, 2], "b_y": [2, 3], "b_z": [3, 4], "a_k": [3, 5]}), + ), + ], +) +def test_concat_suffixes_series(objs, suffixes, expected): + # GH 21791, test if suffixes is assigned correctly when objs are all Series + output = pd.concat(objs, axis=1, suffixes=suffixes) + tm.assert_frame_equal(output, expected) + + +@pytest.mark.parametrize( + "objs, suffixes, expected", + [ + ( + [pd.DataFrame({"a": [1, 2]}), pd.DataFrame({"a": [2, 3], "b": [3, 4]})], + ("_x", "_y"), + pd.DataFrame({"a_x": [1, 2], "a_y": [2, 3], "b": [3, 4]}), + ), + ( + [ + pd.DataFrame({"a": [1, 2], "b": [2, 3]}), + pd.DataFrame({"a": [2, 3]}), + pd.DataFrame({"a": [3, 4], "b": [4, 5], "c": [5, 6]}), + ], + ("_x", "_y", "_z"), + pd.DataFrame( + { + "a_x": [1, 2], + "b_x": [2, 3], + "a_y": [2, 3], + "a_z": [3, 4], + "b_z": [4, 5], + "c": [5, 6], + } + ), + ), + ], +) +def test_concat_suffixes_dataframes(objs, suffixes, expected): + # GH 21791, test if suffixes is assigned correctly when objs are all DataFrames + output = pd.concat(objs, axis=1, suffixes=suffixes) + tm.assert_frame_equal(output, expected) + + +@pytest.mark.parametrize( + "objs, suffixes, expected", + [ + ( + [pd.Series([1, 2], name="a"), pd.DataFrame({"a": [2, 3], "b": [2, 5]})], + ("_x", "_y"), + pd.DataFrame({"a_x": [1, 2], "a_y": [2, 3], "b": [2, 5]}), + ), + ( + [ + pd.Series([1, 2], name="a"), + pd.DataFrame({"a": [2, 3], "b": [2, 5]}), + pd.Series([3, 4], name="b"), + ], + ("_x", "_y", "_z"), + pd.DataFrame({"a_x": [1, 2], "a_y": [2, 3], "b_y": [2, 5], "b_z": [3, 4]}), + ), + ], +) +def test_concat_suffixes_mixed_series_dataframe(objs, suffixes, expected): + # GH 21791, test if suffixes is assigned correctly when objs are mixed Series and + # DataFrames + output = pd.concat(objs, axis=1, suffixes=suffixes) + tm.assert_frame_equal(output, expected) From 901a21b47f38cbb3ab993a1f0181442ff4de4576 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 11:22:27 +0100 Subject: [PATCH 03/10] Add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 896ae91c68642..74550b97df6d2 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -114,6 +114,7 @@ Other enhancements - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) - :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) +- Added ``suffixes`` argument to :meth:`pandas.concat` to distinguish overlapping column names after concatenation (:issue:`21791`) Build Changes ^^^^^^^^^^^^^ From fd64695006565146acbf7562adb7d19eb6ed7b57 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 11:48:20 +0100 Subject: [PATCH 04/10] fix linting --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 3b6ca6a69f53c..7db929305a524 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -642,7 +642,7 @@ def _items_overlap_with_suffix(self, objs, suffixes): if self._is_series: # when _is_series is True, objs are actually column Index - overlap_cols = [obj for obj in objs] + overlap_cols = list(objs) else: overlap_cols = chain.from_iterable([obj.columns for obj in objs]) to_rename = [col for col, cnt in Counter(overlap_cols).items() if cnt > 1] From b7e97d6943070affe26a669007dcb8d0500a21cf Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 11:49:58 +0100 Subject: [PATCH 05/10] fix linting --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 7db929305a524..76999b7f851e0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -41,7 +41,7 @@ def concat( names=None, verify_integrity: bool = False, sort=None, - suffixes=None, + suffixes: tuple = None, copy: bool = True, ): """ From fd53b09b4d94f13c477305d7475a4b59c19a7f8c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 12:13:13 +0100 Subject: [PATCH 06/10] fix linting --- pandas/core/reshape/concat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f72a606e86956..8145fe1be19d0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -41,7 +41,7 @@ def concat( names=None, verify_integrity: bool = False, sort=None, - suffixes: tuple = None, + suffixes=None, copy: bool = True, ): """ @@ -99,7 +99,7 @@ def concat( .. versionadded:: 0.23.0 - suffixes: tuple of str, default None + suffixes : tuple of str, default None Suffix to apply to overlapping column names for each concatenated object respectively. If None and there is overlapping column names after concat, DataFrame with duplicated names will be outputted along with a warning From 1252f7b7008ced0f2aed4a53ff1241b5a0af5e04 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 12:28:57 +0100 Subject: [PATCH 07/10] fix tests --- pandas/core/reshape/concat.py | 19 ++----------------- pandas/tests/reshape/test_concat.py | 17 ----------------- 2 files changed, 2 insertions(+), 34 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 8145fe1be19d0..f73d60aca95af 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -101,9 +101,7 @@ def concat( suffixes : tuple of str, default None Suffix to apply to overlapping column names for each concatenated object - respectively. If None and there is overlapping column names after concat, - DataFrame with duplicated names will be outputted along with a warning - message. If the length of suffixes does not match with number of + respectively. If the length of suffixes does not match with number of concatenated objects, an error will raise. This has no effect if there is no overlapping column names or if axis=0. @@ -631,9 +629,6 @@ def _items_overlap_with_suffix(self, objs, suffixes): Be aware that `objs` can be either DataFrame-like or Index-like given if `self._is_series` is True or False. - - Since default is None, therefore, if overlapping is found and suffixes - is default None, will raise a warning and return the objs as is. """ if self._is_series: @@ -643,17 +638,7 @@ def _items_overlap_with_suffix(self, objs, suffixes): overlap_cols = chain.from_iterable([obj.columns for obj in objs]) to_rename = [col for col, cnt in Counter(overlap_cols).items() if cnt > 1] - if len(to_rename) == 0: - return objs - - if suffixes is None: - - # this is to keep current behavior unchanged for users, so just raise - # a warning instead of error - warnings.warn( - "There will have duplicated columns after concatenation," - "you could avoid it by setting suffixes." - ) + if len(to_rename) == 0 or suffixes is None: return objs if not isinstance(suffixes, tuple): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 19e9146ba944d..54ad2b253e4e6 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2778,23 +2778,6 @@ def test_concat_datetimeindex_freq(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "obj1, obj2", - [ - (pd.Series([1, 2, 3], name="a"), pd.Series([2, 3, 4], name="a")), - (pd.Series([1, 2, 3], name="a"), pd.DataFrame({"a": [2, 3, 4]})), - (pd.DataFrame({"a": [1, 2, 3]}), pd.DataFrame({"a": [2, 3, 4]})), - ], -) -def test_concat_suffixes_warning(obj1, obj2): - # GH 21791, add test for warning when suffix is None and columns overlap - with catch_warnings(record=True): - output = pd.concat([obj1, obj2], axis=1) - - tm.assert_series_equal(output.iloc[:, 0], pd.Series([1, 2, 3], name="a")) - tm.assert_series_equal(output.iloc[:, 1], pd.Series([2, 3, 4], name="a")) - - @pytest.mark.parametrize("suffixes", ["_a", ("_x"), ["a", "b"]]) def test_concat_suffixes_type(suffixes): # GH 21791, like pd.merge, here suffixes type should be tuple From 233adee1994d6a5b766f1006bf38ccff4b09f879 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 12:29:54 +0100 Subject: [PATCH 08/10] add docstring --- pandas/core/reshape/concat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f73d60aca95af..234bb0ee16332 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -102,7 +102,8 @@ def concat( suffixes : tuple of str, default None Suffix to apply to overlapping column names for each concatenated object respectively. If the length of suffixes does not match with number of - concatenated objects, an error will raise. + concatenated objects, an error will raise. If None, the output will remain + as is with duplicated column names. This has no effect if there is no overlapping column names or if axis=0. From f7d3d591d49d4d81cc2e4e37a662effad6d1a329 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 16:59:12 +0100 Subject: [PATCH 09/10] code change on comments --- pandas/core/reshape/concat.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 234bb0ee16332..93784f1902194 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -5,6 +5,7 @@ from collections import Counter from functools import partial from itertools import chain +from typing import Optional import warnings import numpy as np @@ -41,7 +42,7 @@ def concat( names=None, verify_integrity: bool = False, sort=None, - suffixes=None, + suffixes: Optional[tuple] = None, copy: bool = True, ): """ @@ -644,8 +645,8 @@ def _items_overlap_with_suffix(self, objs, suffixes): if not isinstance(suffixes, tuple): raise ValueError( - "Invalid type {t} is assigned to suffixes, only" - " is allowed.".format(t=type(suffixes)) + f"Invalid type {type(suffixes)} is assigned to suffixes, only" + f"'tuple' is allowed." ) if len(objs) != len(suffixes): @@ -671,7 +672,7 @@ def renamer(x, suffix): x : renamed column name """ if x in to_rename and suffix is not None: - return "{x}{suffix}".format(x=x, suffix=suffix) + return f"{x}{suffix}" return x if self._is_series: From c8570707ca7a39285bb690e540778ba2805e76f4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 17 Nov 2019 17:44:02 +0100 Subject: [PATCH 10/10] fix error message --- pandas/core/reshape/concat.py | 2 +- pandas/tests/reshape/test_concat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 93784f1902194..d6627870ab7d7 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -645,7 +645,7 @@ def _items_overlap_with_suffix(self, objs, suffixes): if not isinstance(suffixes, tuple): raise ValueError( - f"Invalid type {type(suffixes)} is assigned to suffixes, only" + f"Invalid type {type(suffixes)} is assigned to suffixes, only " f"'tuple' is allowed." ) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 54ad2b253e4e6..4f90511de48f5 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2782,7 +2782,7 @@ def test_concat_datetimeindex_freq(): def test_concat_suffixes_type(suffixes): # GH 21791, like pd.merge, here suffixes type should be tuple objs = [pd.Series([1, 2], name="a"), pd.DataFrame({"a": [2, 3]})] - with pytest.raises(ValueError, match="only is allowed"): + with pytest.raises(ValueError, match="only 'tuple' is allowed"): pd.concat(objs, axis=1, suffixes=suffixes)