26
26
def cut (x , bins , right = True , labels = None , retbins = False , precision = 3 ,
27
27
include_lowest = False ):
28
28
"""
29
- Return indices of half-open `bins` to which each value of `x` belongs.
29
+ Bin `x` and return data about the bin to which each `x` value belongs.
30
30
31
- Use `cut` when you need to segment and sort data values into bins or
32
- buckets of data. This function is also useful for going from a continuous
33
- variable to a categorical variable. For example, `cut` could convert ages
34
- to groups of age ranges.
31
+ This function splits `x` into the specified number of equal-width half-
32
+ open bins. Based on the parameters specified and the input, returns
33
+ information about the half-open bins to which each value of `x` belongs
34
+ or the bins themselves.
35
+ Use `cut` when you need to segment and sort data values into bins. This
36
+ function is also useful for going from a continuous variable to a
37
+ categorical variable. For example, `cut` could convert ages to groups
38
+ of age ranges.
35
39
36
40
Parameters
37
41
----------
38
42
x : array-like
39
- Input array to be binned. It has to be 1-dimensional.
43
+ The input array to be binned. Must be 1-dimensional.
40
44
bins : int, sequence of scalars, or pandas.IntervalIndex
41
- If `bins` is an int, defines the number of equal-width bins in the
42
- range of `x`. The range of `x` is extended by .1% on each side to
43
- include the min or max values of `x`.
44
- If `bins` is a sequence, defines the bin edges allowing for
45
- non-uniform bin width. No extension of the range of `x` is done.
46
- right : bool, optional, default 'True'
45
+ If int, defines the number of equal-width bins in the range of `x`.
46
+ The range of `x` is extended by .1% on each side to include the min or
47
+ max values of `x`.
48
+ If a sequence, defines the bin edges allowing for non-uniform width.
49
+ No extension of the range of `x` is done.
50
+ right : bool, default 'True'
47
51
Indicates whether the `bins` include the rightmost edge or not. If
48
52
`right == True` (the default), then the `bins` [1,2,3,4] indicate
49
53
(1,2], (2,3], (3,4].
50
54
labels : array or bool, optional
51
- Used as labels for the resulting ` bins` . Must be of the same length as
52
- the resulting ` bins` . If False, returns only integer indicators of the
53
- ` bins` .
54
- retbins : bool, optional, default 'False'
55
- Whether to return the ` bins` or not. Useful when ` bins` is provided
55
+ Specifies the labels for the returned bins. Must be the same length as
56
+ the resulting bins. If False, returns only integer indicators of the
57
+ bins.
58
+ retbins : bool, default 'False'
59
+ Whether to return the bins or not. Useful when bins is provided
56
60
as a scalar.
57
- precision : int, optional, default '3'
58
- The precision at which to store and display the ` bins` labels.
59
- include_lowest : bool, optional, default 'False'
61
+ precision : int, default '3'
62
+ The precision at which to store and display the bins labels.
63
+ include_lowest : bool, default 'False'
60
64
Whether the first interval should be left-inclusive or not.
61
65
62
66
Returns
63
67
-------
64
68
out : pandas.Categorical or Series, or array of int if `labels` is 'False'
65
69
The return type depends on the input.
66
70
If the input is a Series, a Series of type category is returned.
67
- Else - pandas.Categorical is returned. ` Bins` are represented as
71
+ Else - pandas.Categorical is returned. Bins are represented as
68
72
categories when categorical data is returned.
69
73
bins : numpy.ndarray of floats
70
- Returned only if `retbins` is 'True'.
74
+ Returned when `retbins` is 'True'.
71
75
72
76
See Also
73
77
--------
@@ -87,10 +91,16 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
87
91
88
92
Examples
89
93
--------
90
- >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
94
+ >>> pd.cut(np.array([1,7,5,4,6,3]), 3)
95
+ ... # doctest: +ELLIPSIS
96
+ [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
97
+ Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
98
+
99
+ >>> pd.cut(np.array([1,7,5,4,6,3]), 3, retbins=True)
91
100
... # doctest: +ELLIPSIS
92
- ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ...
93
- Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ...
101
+ ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
102
+ Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
103
+ array([0.994, 3. , 5. , 7. ]))
94
104
95
105
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]),
96
106
... 3, labels=["good", "medium", "bad"])
@@ -100,6 +110,17 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
100
110
101
111
>>> pd.cut(np.ones(5), 4, labels=False)
102
112
array([1, 1, 1, 1, 1], dtype=int64)
113
+
114
+ >>> s = pd.Series(np.array([2,4,6,8,10]), index=['a', 'b', 'c', 'd', 'e'])
115
+ >>> pd.cut(s, 3)
116
+ ... # doctest: +ELLIPSIS
117
+ a (1.992, 4.667]
118
+ b (1.992, 4.667]
119
+ c (4.667, 7.333]
120
+ d (7.333, 10.0]
121
+ e (7.333, 10.0]
122
+ dtype: category
123
+ Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ...
103
124
"""
104
125
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
105
126
0 commit comments