diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6d20907373014..604ee47c8fee7 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -632,9 +632,9 @@ def str_split(arr, pat=None, n=None, return_type='series'): pat : string, default None String or regular expression to split on. If None, splits on whitespace n : int, default None (all) - return_type : {'series', 'index', 'frame'}, default 'series' - If frame, returns a DataFrame (elements are strings) - If series or index, returns the same type as the original object + return_type : {'series', 'index', 'frame', 'same', 'expand'}, default 'series' + If frame or expand, returns a DataFrame (elements are strings) + If series, index or same, returns the same type as the original object (elements are lists of strings). Notes @@ -649,9 +649,9 @@ def str_split(arr, pat=None, n=None, return_type='series'): from pandas.core.frame import DataFrame from pandas.core.index import Index - if return_type not in ('series', 'index', 'frame'): - raise ValueError("return_type must be {'series', 'index', 'frame'}") - if return_type == 'frame' and isinstance(arr, Index): + if return_type not in ('series', 'index', 'frame', 'same', 'expand'): + raise ValueError("return_type must be {'series', 'index', 'frame', 'same', 'expand'}") + if return_type in ('frame', 'expand') and isinstance(arr, Index): raise ValueError("return_type='frame' is not supported for string " "methods on Index") if pat is None: @@ -668,7 +668,7 @@ def str_split(arr, pat=None, n=None, return_type='series'): n = 0 regex = re.compile(pat) f = lambda x: regex.split(x, maxsplit=n) - if return_type == 'frame': + if return_type in ('frame', 'expand'): res = DataFrame((Series(x) for x in _na_map(f, arr)), index=arr.index) else: res = _na_map(f, arr) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index bb75b12754dca..7267924baf023 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1220,8 +1220,12 @@ def test_str_attribute(self): tm.assert_index_equal(idx.str.split(return_type='series'), expected) # return_type 'index' is an alias for 'series' tm.assert_index_equal(idx.str.split(return_type='index'), expected) + # return_type 'same' is an alias for 'series' and 'index' + tm.assert_index_equal(idx.str.split(return_type='same'), expected) with self.assertRaisesRegexp(ValueError, 'not supported'): idx.str.split(return_type='frame') + with self.assertRaisesRegexp(ValueError, 'not supported'): + idx.str.split(return_type='expand') # test boolean case, should return np.array instead of boolean Index idx = Index(['a1', 'a2', 'b1', 'b2'])