@@ -1232,9 +1232,9 @@ def test_categorical_and_not_categorical_key(observed):
1232
1232
tm .assert_frame_equal (result , expected_explicit )
1233
1233
1234
1234
# Series case
1235
- result = df_with_categorical .groupby (["A" , "C" ], observed = observed )[ "B" ]. transform (
1236
- "sum"
1237
- )
1235
+ gb = df_with_categorical .groupby (["A" , "C" ], observed = observed )
1236
+ gbp = gb [ "B" ]
1237
+ result = gbp . transform ( "sum" )
1238
1238
expected = df_without_categorical .groupby (["A" , "C" ])["B" ].transform ("sum" )
1239
1239
tm .assert_series_equal (result , expected )
1240
1240
expected_explicit = Series ([4 , 2 , 4 ], name = "B" )
@@ -1535,3 +1535,151 @@ def test_transform_sum_one_column_with_matching_labels_and_missing_labels():
1535
1535
result = df .groupby (series , as_index = False ).transform ("sum" )
1536
1536
expected = DataFrame ({"X" : [- 93203.0 , - 93203.0 , np .nan ]})
1537
1537
tm .assert_frame_equal (result , expected )
1538
+
1539
+
1540
+ def test_min_one_unobserved_category_no_type_coercion ():
1541
+ df = DataFrame ({"A" : Categorical ([1 , 1 , 2 ], categories = [1 , 2 , 3 ]), "B" : [3 , 4 , 5 ]})
1542
+ df ["B" ] = df ["B" ].astype ("int32" )
1543
+ gb = df .groupby ("A" , observed = False )
1544
+ result = gb .transform ("min" )
1545
+
1546
+ expected = DataFrame ({"B" : [3 , 3 , 5 ]}, dtype = "int32" )
1547
+ tm .assert_frame_equal (expected , result )
1548
+ assert df ["B" ].dtype == result ["B" ].dtype
1549
+
1550
+
1551
+ def test_min_multiple_unobserved_categories_no_type_coercion ():
1552
+ df = DataFrame (
1553
+ {
1554
+ "X" : Categorical (
1555
+ ["432945" , "randomcat" , - 4325466 , "randomcat" , - 4325466 , - 4325466 ],
1556
+ categories = [
1557
+ 1 ,
1558
+ "randomcat" ,
1559
+ 100 ,
1560
+ 333 ,
1561
+ "cat43543" ,
1562
+ - 4325466 ,
1563
+ 54665 ,
1564
+ - 546767 ,
1565
+ "432945" ,
1566
+ 767076 ,
1567
+ ],
1568
+ ),
1569
+ "Y" : [0 , 940645 , np .iinfo (np .int64 ).min , 9449 , 100044444 , 40 ],
1570
+ }
1571
+ )
1572
+ df ["Y" ] = df ["Y" ].astype ("int64" )
1573
+
1574
+ gb = df .groupby ("X" , observed = False )
1575
+ result = gb .transform ("min" )
1576
+
1577
+ expected = DataFrame (
1578
+ {
1579
+ "Y" : [
1580
+ 0 ,
1581
+ 9449 ,
1582
+ np .iinfo (np .int64 ).min ,
1583
+ 9449 ,
1584
+ np .iinfo (np .int64 ).min ,
1585
+ np .iinfo (np .int64 ).min ,
1586
+ ]
1587
+ },
1588
+ dtype = "int64" ,
1589
+ )
1590
+ tm .assert_frame_equal (expected , result )
1591
+ assert df ["Y" ].dtype == result ["Y" ].dtype
1592
+
1593
+
1594
+ def test_min_float32_multiple_unobserved_categories_no_type_coercion ():
1595
+ df = DataFrame (
1596
+ {
1597
+ "X" : Categorical (
1598
+ ["cat43543" , - 4325466 , 54665 , "cat43543" , - 4325466 , 54665 ],
1599
+ categories = [
1600
+ 1 ,
1601
+ "randomcat" ,
1602
+ 100 ,
1603
+ 333 ,
1604
+ "cat43543" ,
1605
+ - 4325466 ,
1606
+ 54665 ,
1607
+ - 546767 ,
1608
+ "432945" ,
1609
+ 767076 ,
1610
+ ],
1611
+ ),
1612
+ "Y" : [
1613
+ 0.3940429 ,
1614
+ 940645.49 ,
1615
+ np .finfo (np .float32 ).min ,
1616
+ 9449.03333 ,
1617
+ 100044444.403294 ,
1618
+ 40.3020909 ,
1619
+ ],
1620
+ }
1621
+ )
1622
+ df ["Y" ] = df ["Y" ].astype ("float32" )
1623
+
1624
+ gb = df .groupby ("X" , observed = False )
1625
+ result = gb .transform ("min" )
1626
+
1627
+ expected = DataFrame (
1628
+ {
1629
+ "Y" : [
1630
+ 0.3940429 ,
1631
+ 940645.49 ,
1632
+ np .finfo (np .float32 ).min ,
1633
+ 0.3940429 ,
1634
+ 940645.49 ,
1635
+ np .finfo (np .float32 ).min ,
1636
+ ]
1637
+ },
1638
+ dtype = "float32" ,
1639
+ )
1640
+ tm .assert_frame_equal (expected , result )
1641
+ assert df ["Y" ].dtype == result ["Y" ].dtype
1642
+
1643
+
1644
+ def test_min_all_empty_data_no_type_coercion ():
1645
+ df = DataFrame (
1646
+ {
1647
+ "X" : Categorical (
1648
+ [],
1649
+ categories = [
1650
+ 1 ,
1651
+ "randomcat" ,
1652
+ 100 ,
1653
+ 333 ,
1654
+ "cat43543" ,
1655
+ - 4325466 ,
1656
+ 54665 ,
1657
+ - 546767 ,
1658
+ "432945" ,
1659
+ 767076 ,
1660
+ ],
1661
+ ),
1662
+ "Y" : [],
1663
+ }
1664
+ )
1665
+ df ["Y" ] = df ["Y" ].astype ("int32" )
1666
+
1667
+ gb = df .groupby ("X" , observed = False )
1668
+ result = gb .transform ("min" )
1669
+
1670
+ expected = DataFrame ({"Y" : []}, dtype = "int32" )
1671
+ tm .assert_frame_equal (expected , result )
1672
+ assert df ["Y" ].dtype == result ["Y" ].dtype
1673
+
1674
+
1675
+ def test_min_one_dim_no_type_coercion ():
1676
+ df = DataFrame ({"Y" : [9435 , - 5465765 , 5055 , 0 , 954960 ]})
1677
+ df ["Y" ] = df ["Y" ].astype ("int32" )
1678
+ categories = Categorical ([1 , 2 , 2 , 5 , 1 ], categories = [1 , 2 , 3 , 4 , 5 ])
1679
+
1680
+ gb = df .groupby (categories , observed = False )
1681
+ result = gb .transform ("min" )
1682
+
1683
+ expected = DataFrame ({"Y" : [9435 , - 5465765 , - 5465765 , 0 , 9435 ]}, dtype = "int32" )
1684
+ tm .assert_frame_equal (expected , result )
1685
+ assert df ["Y" ].dtype == result ["Y" ].dtype
0 commit comments