-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Ensure valid Block mutation in SeriesBinGrouper. #32561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
5007426
ad746ba
922b30d
f63acd3
7e49bd5
26ecad8
cb5d20f
4649033
7060dd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
|
||
from pandas.core.dtypes.common import ensure_int64 | ||
|
||
import pandas as pd | ||
from pandas import Index, Series, isna | ||
import pandas._testing as tm | ||
|
||
|
@@ -51,6 +52,32 @@ def test_series_bin_grouper(): | |
tm.assert_almost_equal(counts, exp_counts) | ||
|
||
|
||
def assert_block_lengths(x): | ||
assert len(x) == len(x._data.blocks[0].mgr_locs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this fails, the assertion errors bubbles up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 152086c241..b6518c1962 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -53,7 +53,7 @@ def test_series_bin_grouper():
def assert_block_lengths(x):
- assert len(x) == len(x._data.blocks[0].mgr_locs)
+ assert len(x) == len(x._data.blocks[0].mgr_locs) + 1
return 0 ___________________________________________________________________ test_mgr_locs_updated[assert_block_lengths] ____________________________________________________________________
func = <function assert_block_lengths at 0x122354b90>
@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths])
def test_mgr_locs_updated(func):
# https://github.com/pandas-dev/pandas/issues/31802
# Some operations may require creating new blocks, which requires
# valid mgr_locs
df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]})
> result = df.groupby(["A", "B"]).agg(func)
pandas/tests/groupby/test_bin_groupby.py:71:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/groupby/generic.py:939: in aggregate
return self._python_agg_general(func, *args, **kwargs)
pandas/core/groupby/groupby.py:926: in _python_agg_general
result, counts = self.grouper.agg_series(obj, f)
pandas/core/groupby/ops.py:640: in agg_series
return self._aggregate_series_fast(obj, func)
pandas/core/groupby/ops.py:665: in _aggregate_series_fast
result, counts = grouper.get_result()
pandas/_libs/reduction.pyx:377: in pandas._libs.reduction.SeriesGrouper.get_result
res, initialized = self._apply_to_group(cached_typ, cached_ityp,
pandas/_libs/reduction.pyx:195: in pandas._libs.reduction._BaseGrouper._apply_to_group
res = self.f(cached_typ)
pandas/core/groupby/groupby.py:913: in <lambda>
f = lambda x: func(x, *args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
x = Series([], Name: C, dtype: int64)
def assert_block_lengths(x):
> assert len(x) == len(x._data.blocks[0].mgr_locs) + 1
E assert 1 == (1 + 1)
E + where 1 = len(0 1\nName: C, dtype: int64)
E + and 1 = len(BlockPlacement(slice(0, 1, 1)))
E + where BlockPlacement(slice(0, 1, 1)) = IntBlock: 1 dtype: int64.mgr_locs
pandas/tests/groupby/test_bin_groupby.py:56: AssertionError
==================================================================== 1 failed, 1 passed, 9 deselected in 0.24s ===================================================================== |
||
return 0 | ||
|
||
|
||
def cumsum_max(x): | ||
x.cumsum().max() # triggers the ValueError when creating a block | ||
return 0 | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
@pytest.mark.parametrize( | ||
"func", | ||
[ | ||
cumsum_max, | ||
pytest.param(assert_block_lengths, marks=pytest.mark.xfail(reason="debatable")), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Currently we just catch ValueError in https://github.com/pandas-dev/pandas/pull/32561/files#diff-8c0985a9fca770c2028bed688dfc043fR641. "fixing" this would essentially require an |
||
], | ||
) | ||
def test_operation_on_invalid_block_passes(func): | ||
# https://github.com/pandas-dev/pandas/issues/31802 | ||
# SeriesBinGrouper creates an invalid block, which may | ||
# raise arbitrary exceptions. | ||
df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]}) | ||
result = df.groupby(["A", "B"]).agg(func) | ||
assert isinstance(result, pd.DataFrame) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"binner,closed,expected", | ||
[ | ||
|
Uh oh!
There was an error while loading. Please reload this page.