|
9 | 9 | from numpy import nan
|
10 | 10 |
|
11 | 11 | from pandas import (date_range, bdate_range, Timestamp,
|
12 |
| - isnull, Index, MultiIndex, DataFrame, Series, |
| 12 | + Index, MultiIndex, DataFrame, Series, |
13 | 13 | concat, Panel, DatetimeIndex)
|
14 | 14 | from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
|
15 | 15 | from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
|
@@ -87,229 +87,6 @@ def test_select_bad_cols(self):
|
87 | 87 | # will have to rethink regex if you change message!
|
88 | 88 | g[['A', 'C']]
|
89 | 89 |
|
90 |
| - def test_first_last_nth(self): |
91 |
| - # tests for first / last / nth |
92 |
| - grouped = self.df.groupby('A') |
93 |
| - first = grouped.first() |
94 |
| - expected = self.df.loc[[1, 0], ['B', 'C', 'D']] |
95 |
| - expected.index = Index(['bar', 'foo'], name='A') |
96 |
| - expected = expected.sort_index() |
97 |
| - assert_frame_equal(first, expected) |
98 |
| - |
99 |
| - nth = grouped.nth(0) |
100 |
| - assert_frame_equal(nth, expected) |
101 |
| - |
102 |
| - last = grouped.last() |
103 |
| - expected = self.df.loc[[5, 7], ['B', 'C', 'D']] |
104 |
| - expected.index = Index(['bar', 'foo'], name='A') |
105 |
| - assert_frame_equal(last, expected) |
106 |
| - |
107 |
| - nth = grouped.nth(-1) |
108 |
| - assert_frame_equal(nth, expected) |
109 |
| - |
110 |
| - nth = grouped.nth(1) |
111 |
| - expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() |
112 |
| - expected.index = Index(['foo', 'bar'], name='A') |
113 |
| - expected = expected.sort_index() |
114 |
| - assert_frame_equal(nth, expected) |
115 |
| - |
116 |
| - # it works! |
117 |
| - grouped['B'].first() |
118 |
| - grouped['B'].last() |
119 |
| - grouped['B'].nth(0) |
120 |
| - |
121 |
| - self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan |
122 |
| - self.assertTrue(isnull(grouped['B'].first()['foo'])) |
123 |
| - self.assertTrue(isnull(grouped['B'].last()['foo'])) |
124 |
| - self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) |
125 |
| - |
126 |
| - # v0.14.0 whatsnew |
127 |
| - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
128 |
| - g = df.groupby('A') |
129 |
| - result = g.first() |
130 |
| - expected = df.iloc[[1, 2]].set_index('A') |
131 |
| - assert_frame_equal(result, expected) |
132 |
| - |
133 |
| - expected = df.iloc[[1, 2]].set_index('A') |
134 |
| - result = g.nth(0, dropna='any') |
135 |
| - assert_frame_equal(result, expected) |
136 |
| - |
137 |
| - def test_first_last_nth_dtypes(self): |
138 |
| - |
139 |
| - df = self.df_mixed_floats.copy() |
140 |
| - df['E'] = True |
141 |
| - df['F'] = 1 |
142 |
| - |
143 |
| - # tests for first / last / nth |
144 |
| - grouped = df.groupby('A') |
145 |
| - first = grouped.first() |
146 |
| - expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] |
147 |
| - expected.index = Index(['bar', 'foo'], name='A') |
148 |
| - expected = expected.sort_index() |
149 |
| - assert_frame_equal(first, expected) |
150 |
| - |
151 |
| - last = grouped.last() |
152 |
| - expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] |
153 |
| - expected.index = Index(['bar', 'foo'], name='A') |
154 |
| - expected = expected.sort_index() |
155 |
| - assert_frame_equal(last, expected) |
156 |
| - |
157 |
| - nth = grouped.nth(1) |
158 |
| - expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] |
159 |
| - expected.index = Index(['bar', 'foo'], name='A') |
160 |
| - expected = expected.sort_index() |
161 |
| - assert_frame_equal(nth, expected) |
162 |
| - |
163 |
| - # GH 2763, first/last shifting dtypes |
164 |
| - idx = lrange(10) |
165 |
| - idx.append(9) |
166 |
| - s = Series(data=lrange(11), index=idx, name='IntCol') |
167 |
| - self.assertEqual(s.dtype, 'int64') |
168 |
| - f = s.groupby(level=0).first() |
169 |
| - self.assertEqual(f.dtype, 'int64') |
170 |
| - |
171 |
| - def test_nth(self): |
172 |
| - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
173 |
| - g = df.groupby('A') |
174 |
| - |
175 |
| - assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) |
176 |
| - assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) |
177 |
| - assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) |
178 |
| - assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) |
179 |
| - assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) |
180 |
| - assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) |
181 |
| - assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) |
182 |
| - assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) |
183 |
| - assert_frame_equal(g[['B']].nth(0), |
184 |
| - df.loc[[0, 2], ['A', 'B']].set_index('A')) |
185 |
| - |
186 |
| - exp = df.set_index('A') |
187 |
| - assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) |
188 |
| - assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) |
189 |
| - |
190 |
| - exp['B'] = np.nan |
191 |
| - assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) |
192 |
| - assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) |
193 |
| - |
194 |
| - # out of bounds, regression from 0.13.1 |
195 |
| - # GH 6621 |
196 |
| - df = DataFrame({'color': {0: 'green', |
197 |
| - 1: 'green', |
198 |
| - 2: 'red', |
199 |
| - 3: 'red', |
200 |
| - 4: 'red'}, |
201 |
| - 'food': {0: 'ham', |
202 |
| - 1: 'eggs', |
203 |
| - 2: 'eggs', |
204 |
| - 3: 'ham', |
205 |
| - 4: 'pork'}, |
206 |
| - 'two': {0: 1.5456590000000001, |
207 |
| - 1: -0.070345000000000005, |
208 |
| - 2: -2.4004539999999999, |
209 |
| - 3: 0.46206000000000003, |
210 |
| - 4: 0.52350799999999997}, |
211 |
| - 'one': {0: 0.56573799999999996, |
212 |
| - 1: -0.9742360000000001, |
213 |
| - 2: 1.033801, |
214 |
| - 3: -0.78543499999999999, |
215 |
| - 4: 0.70422799999999997}}).set_index(['color', |
216 |
| - 'food']) |
217 |
| - |
218 |
| - result = df.groupby(level=0, as_index=False).nth(2) |
219 |
| - expected = df.iloc[[-1]] |
220 |
| - assert_frame_equal(result, expected) |
221 |
| - |
222 |
| - result = df.groupby(level=0, as_index=False).nth(3) |
223 |
| - expected = df.loc[[]] |
224 |
| - assert_frame_equal(result, expected) |
225 |
| - |
226 |
| - # GH 7559 |
227 |
| - # from the vbench |
228 |
| - df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') |
229 |
| - s = df[1] |
230 |
| - g = df[0] |
231 |
| - expected = s.groupby(g).first() |
232 |
| - expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) |
233 |
| - assert_series_equal(expected2, expected, check_names=False) |
234 |
| - self.assertTrue(expected.name, 0) |
235 |
| - self.assertEqual(expected.name, 1) |
236 |
| - |
237 |
| - # validate first |
238 |
| - v = s[g == 1].iloc[0] |
239 |
| - self.assertEqual(expected.iloc[0], v) |
240 |
| - self.assertEqual(expected2.iloc[0], v) |
241 |
| - |
242 |
| - # this is NOT the same as .first (as sorted is default!) |
243 |
| - # as it keeps the order in the series (and not the group order) |
244 |
| - # related GH 7287 |
245 |
| - expected = s.groupby(g, sort=False).first() |
246 |
| - result = s.groupby(g, sort=False).nth(0, dropna='all') |
247 |
| - assert_series_equal(result, expected) |
248 |
| - |
249 |
| - # doc example |
250 |
| - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
251 |
| - g = df.groupby('A') |
252 |
| - result = g.B.nth(0, dropna=True) |
253 |
| - expected = g.B.first() |
254 |
| - assert_series_equal(result, expected) |
255 |
| - |
256 |
| - # test multiple nth values |
257 |
| - df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], |
258 |
| - columns=['A', 'B']) |
259 |
| - g = df.groupby('A') |
260 |
| - |
261 |
| - assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) |
262 |
| - assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) |
263 |
| - assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) |
264 |
| - assert_frame_equal( |
265 |
| - g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) |
266 |
| - assert_frame_equal( |
267 |
| - g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) |
268 |
| - assert_frame_equal( |
269 |
| - g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) |
270 |
| - assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) |
271 |
| - assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) |
272 |
| - |
273 |
| - business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', |
274 |
| - freq='B') |
275 |
| - df = DataFrame(1, index=business_dates, columns=['a', 'b']) |
276 |
| - # get the first, fourth and last two business days for each month |
277 |
| - key = (df.index.year, df.index.month) |
278 |
| - result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) |
279 |
| - expected_dates = pd.to_datetime( |
280 |
| - ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', |
281 |
| - '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', |
282 |
| - '2014/6/27', '2014/6/30']) |
283 |
| - expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) |
284 |
| - assert_frame_equal(result, expected) |
285 |
| - |
286 |
| - def test_nth_multi_index(self): |
287 |
| - # PR 9090, related to issue 8979 |
288 |
| - # test nth on MultiIndex, should match .first() |
289 |
| - grouped = self.three_group.groupby(['A', 'B']) |
290 |
| - result = grouped.nth(0) |
291 |
| - expected = grouped.first() |
292 |
| - assert_frame_equal(result, expected) |
293 |
| - |
294 |
| - def test_nth_multi_index_as_expected(self): |
295 |
| - # PR 9090, related to issue 8979 |
296 |
| - # test nth on MultiIndex |
297 |
| - three_group = DataFrame( |
298 |
| - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', |
299 |
| - 'foo', 'foo', 'foo'], |
300 |
| - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', |
301 |
| - 'two', 'two', 'one'], |
302 |
| - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', |
303 |
| - 'dull', 'shiny', 'shiny', 'shiny']}) |
304 |
| - grouped = three_group.groupby(['A', 'B']) |
305 |
| - result = grouped.nth(0) |
306 |
| - expected = DataFrame( |
307 |
| - {'C': ['dull', 'dull', 'dull', 'dull']}, |
308 |
| - index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], |
309 |
| - ['one', 'two', 'one', 'two']], |
310 |
| - names=['A', 'B'])) |
311 |
| - assert_frame_equal(result, expected) |
312 |
| - |
313 | 90 | def test_group_selection_cache(self):
|
314 | 91 | # GH 12839 nth, head, and tail should return same result consistently
|
315 | 92 | df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
|
|
0 commit comments