@@ -1373,7 +1373,22 @@ def test_groupby_multidim_map(self):
1373
1373
1374
1374
@pytest .mark .parametrize ("use_flox" , [True , False ])
1375
1375
@pytest .mark .parametrize ("coords" , [np .arange (4 ), np .arange (4 )[::- 1 ], [2 , 0 , 3 , 1 ]])
1376
- def test_groupby_bins (self , coords : np .typing .ArrayLike , use_flox : bool ) -> None :
1376
+ @pytest .mark .parametrize (
1377
+ "cut_kwargs" ,
1378
+ (
1379
+ {"labels" : None , "include_lowest" : True },
1380
+ {"labels" : None , "include_lowest" : False },
1381
+ {"labels" : ["a" , "b" ]},
1382
+ {"labels" : [1.2 , 3.5 ]},
1383
+ {"labels" : ["b" , "a" ]},
1384
+ ),
1385
+ )
1386
+ def test_groupby_bins (
1387
+ self ,
1388
+ coords : np .typing .ArrayLike ,
1389
+ use_flox : bool ,
1390
+ cut_kwargs : dict ,
1391
+ ) -> None :
1377
1392
array = DataArray (
1378
1393
np .arange (4 ), dims = "dim_0" , coords = {"dim_0" : coords }, name = "a"
1379
1394
)
@@ -1384,11 +1399,10 @@ def test_groupby_bins(self, coords: np.typing.ArrayLike, use_flox: bool) -> None
1384
1399
bins = [0 , 1.5 , 5 ]
1385
1400
1386
1401
df = array .to_dataframe ()
1387
- df ["dim_0_bins" ] = pd .cut (array ["dim_0" ], bins )
1402
+ df ["dim_0_bins" ] = pd .cut (array ["dim_0" ], bins , ** cut_kwargs )
1388
1403
1389
1404
expected_df = df .groupby ("dim_0_bins" ).sum ()
1390
1405
# TODO: can't convert df with IntervalIndex to Xarray
1391
-
1392
1406
expected = (
1393
1407
expected_df .reset_index (drop = True )
1394
1408
.to_xarray ()
@@ -1397,25 +1411,55 @@ def test_groupby_bins(self, coords: np.typing.ArrayLike, use_flox: bool) -> None
1397
1411
)
1398
1412
1399
1413
with xr .set_options (use_flox = use_flox ):
1400
- actual = array .groupby_bins ("dim_0" , bins = bins ).sum ()
1414
+ actual = array .groupby_bins ("dim_0" , bins = bins , ** cut_kwargs ).sum ()
1401
1415
assert_identical (expected , actual )
1402
1416
1403
- actual = array .groupby_bins ("dim_0" , bins = bins , labels = [1.2 , 3.5 ]).sum ()
1404
- assert_identical (expected .assign_coords (dim_0_bins = [1.2 , 3.5 ]), actual )
1405
-
1406
- actual = array .groupby_bins ("dim_0" , bins = bins ).map (lambda x : x .sum ())
1417
+ actual = array .groupby_bins ("dim_0" , bins = bins , ** cut_kwargs ).map (
1418
+ lambda x : x .sum ()
1419
+ )
1407
1420
assert_identical (expected , actual )
1408
1421
1409
1422
# make sure original array dims are unchanged
1410
1423
assert len (array .dim_0 ) == 4
1411
1424
1412
- da = xr .DataArray (np .ones ((2 , 3 , 4 )))
1413
- bins = [- 1 , 0 , 1 , 2 ]
1414
- with xr .set_options (use_flox = False ):
1415
- actual = da .groupby_bins ("dim_0" , bins ).mean (...)
1416
- with xr .set_options (use_flox = True ):
1417
- expected = da .groupby_bins ("dim_0" , bins ).mean (...)
1418
- assert_allclose (actual , expected )
1425
+ def test_groupby_bins_ellipsis (self ):
1426
+ da = xr .DataArray (np .ones ((2 , 3 , 4 )))
1427
+ bins = [- 1 , 0 , 1 , 2 ]
1428
+ with xr .set_options (use_flox = False ):
1429
+ actual = da .groupby_bins ("dim_0" , bins ).mean (...)
1430
+ with xr .set_options (use_flox = True ):
1431
+ expected = da .groupby_bins ("dim_0" , bins ).mean (...)
1432
+ assert_allclose (actual , expected )
1433
+
1434
+ @pytest .mark .parametrize ("use_flox" , [True , False ])
1435
+ def test_groupby_bins_gives_correct_subset (self , use_flox : bool ) -> None :
1436
+ # GH7766
1437
+ rng = np .random .default_rng (42 )
1438
+ coords = rng .normal (5 , 5 , 1000 )
1439
+ bins = np .logspace (- 4 , 1 , 10 )
1440
+ labels = [
1441
+ "one" ,
1442
+ "two" ,
1443
+ "three" ,
1444
+ "four" ,
1445
+ "five" ,
1446
+ "six" ,
1447
+ "seven" ,
1448
+ "eight" ,
1449
+ "nine" ,
1450
+ ]
1451
+ # xArray
1452
+ # Make a mock dataarray
1453
+ darr = xr .DataArray (coords , coords = [coords ], dims = ["coords" ])
1454
+ expected = xr .DataArray (
1455
+ [np .nan , np .nan , 1 , 1 , 1 , 8 , 31 , 104 , 542 ],
1456
+ dims = "coords_bins" ,
1457
+ coords = {"coords_bins" : labels },
1458
+ )
1459
+ gb = darr .groupby_bins ("coords" , bins , labels = labels )
1460
+ with xr .set_options (use_flox = use_flox ):
1461
+ actual = gb .count ()
1462
+ assert_identical (actual , expected )
1419
1463
1420
1464
def test_groupby_bins_empty (self ):
1421
1465
array = DataArray (np .arange (4 ), [("x" , range (4 ))])
0 commit comments