Skip to content

Commit ca2e608

Browse files
committed
API/FIX: wrap result cut/qut in Series and make it ordered
As per the discussion in pandas-dev#8077, the result of cut/qcut should be wrapped into a Series, now that this is possible. Also change the returned Categorical to "ordered=True", this was overlooked when this was introduced in Categorical. Closes: pandas-dev#8077
1 parent aba7d9c commit ca2e608

File tree

4 files changed

+44
-14
lines changed

4 files changed

+44
-14
lines changed

pandas/tests/test_categorical.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,8 +1058,8 @@ def test_assignment_to_dataframe(self):
10581058
labels = [ "{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500) ]
10591059

10601060
df = df.sort(columns=['value'], ascending=True)
1061-
d = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
1062-
s = Series(d)
1061+
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
1062+
d = s.values
10631063
df['D'] = d
10641064
str(df)
10651065

@@ -1081,7 +1081,7 @@ def test_assignment_to_dataframe(self):
10811081

10821082
# sorting
10831083
s.name = 'E'
1084-
self.assertTrue(result2.sort_index().equals(s))
1084+
self.assertTrue(result2.sort_index().equals(s.sort_index()))
10851085

10861086
cat = pd.Categorical([1,2,3,10], levels=[1,2,3,4,10])
10871087
df = pd.DataFrame(pd.Series(cat))

pandas/tests/test_groupby.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3313,7 +3313,8 @@ def test_groupby_categorical_unequal_len(self):
33133313
import pandas as pd
33143314
#GH3011
33153315
series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4])
3316-
bins = pd.cut(series.dropna(), 4)
3316+
# The raises only happens with categorical, not with series of types category
3317+
bins = pd.cut(series.dropna().values, 4)
33173318

33183319
# len(bins) != len(series) here
33193320
self.assertRaises(ValueError,lambda : series.groupby(bins).mean())

pandas/tools/tests/test_tile.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_inf_handling(self):
123123
ex_levels = ['(-inf, 2]', '(2, 4]', '(4, inf]']
124124

125125
np.testing.assert_array_equal(result.levels, ex_levels)
126-
np.testing.assert_array_equal(result_ser.levels, ex_levels)
126+
np.testing.assert_array_equal(result_ser.cat.levels, ex_levels)
127127
self.assertEqual(result[5], '(4, inf]')
128128
self.assertEqual(result[0], '(-inf, 2]')
129129
self.assertEqual(result_ser[5], '(4, inf]')
@@ -230,6 +230,25 @@ def test_qcut_binning_issues(self):
230230
self.assertTrue(ep < en)
231231
self.assertTrue(ep <= sn)
232232

233+
def test_cut_return_categorical(self):
234+
from pandas import Categorical
235+
s = Series([0,1,2,3,4,5,6,7,8])
236+
res = cut(s,3)
237+
exp = Series(Categorical.from_codes([0,0,0,1,1,1,2,2,2],
238+
["(-0.008, 2.667]", "(2.667, 5.333]", "(5.333, 8]"],
239+
ordered=True))
240+
tm.assert_series_equal(res, exp)
241+
242+
def test_qcut_return_categorical(self):
243+
from pandas import Categorical
244+
s = Series([0,1,2,3,4,5,6,7,8])
245+
res = qcut(s,[0,0.333,0.666,1])
246+
exp = Series(Categorical.from_codes([0,0,0,1,1,1,2,2,2],
247+
["[0, 2.664]", "(2.664, 5.328]", "(5.328, 8]"],
248+
ordered=True))
249+
tm.assert_series_equal(res, exp)
250+
251+
233252

234253
def curpath():
235254
pth, _ = os.path.split(os.path.abspath(__file__))

pandas/tools/tile.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
3434
right == True (the default), then the bins [1,2,3,4] indicate
3535
(1,2], (2,3], (3,4].
3636
labels : array or boolean, default None
37-
Labels to use for bin edges, or False to return integer bin labels
37+
Labels to use for bins, or False to return integer bin labels.
3838
retbins : bool, optional
3939
Whether to return the bins or not. Can be useful if bins is given
4040
as a scalar.
@@ -45,7 +45,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
4545
4646
Returns
4747
-------
48-
out : Categorical or array of integers if labels is False
48+
out : Categorical or Series or array of integers if labels is False
49+
The return type (Categorical or Series) depends on the input: a Series of type category if
50+
input is a Series else Categorical.
4951
bins : ndarray of floats
5052
Returned only if `retbins` is True.
5153
@@ -102,9 +104,12 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
102104
if (np.diff(bins) < 0).any():
103105
raise ValueError('bins must increase monotonically.')
104106

105-
return _bins_to_cuts(x, bins, right=right, labels=labels,
106-
retbins=retbins, precision=precision,
107-
include_lowest=include_lowest)
107+
res = _bins_to_cuts(x, bins, right=right, labels=labels,retbins=retbins, precision=precision,
108+
include_lowest=include_lowest)
109+
if isinstance(x, Series):
110+
res = Series(res, index=x.index)
111+
return res
112+
108113

109114

110115
def qcut(x, q, labels=None, retbins=False, precision=3):
@@ -130,7 +135,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
130135
131136
Returns
132137
-------
133-
cat : Categorical
138+
cat : Categorical or Series
139+
Returns a Series of type category if input is a Series else Categorical.
134140
135141
Notes
136142
-----
@@ -144,8 +150,12 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
144150
else:
145151
quantiles = q
146152
bins = algos.quantile(x, quantiles)
147-
return _bins_to_cuts(x, bins, labels=labels, retbins=retbins,
148-
precision=precision, include_lowest=True)
153+
res = _bins_to_cuts(x, bins, labels=labels, retbins=retbins,precision=precision,
154+
include_lowest=True)
155+
if isinstance(x, Series):
156+
res = Series(res, index=x.index)
157+
return res
158+
149159

150160

151161
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
@@ -189,7 +199,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
189199

190200
levels = np.asarray(levels, dtype=object)
191201
np.putmask(ids, na_mask, 0)
192-
fac = Categorical(ids - 1, levels, name=name, fastpath=True)
202+
fac = Categorical(ids - 1, levels, ordered=True, name=name, fastpath=True)
193203
else:
194204
fac = ids - 1
195205
if has_nas:

0 commit comments

Comments
 (0)