-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Additional tests for ufunc(Series) #26951
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
0d6a663
8f46391
44e3c7e
e179913
0b1e745
9be1dff
775c2ef
bbbf269
64d8908
d1788b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import string | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
import pandas as pd | ||
import pandas.util.testing as tm | ||
|
||
UNARY_UFUNCS = [np.positive, np.floor, np.exp] | ||
BINARY_UFUNCS = [np.add, np.logaddexp] # -> dunder op | ||
SPARSE = [ | ||
pytest.param(True, | ||
marks=pytest.mark.xfail(reason="Series.__array_ufunc__")), | ||
False, | ||
] | ||
SPARSE_IDS = ['sparse', 'dense'] | ||
SHUFFLE = [ | ||
pytest.param(True, marks=pytest.mark.xfail(reason="GH-26945")), | ||
False | ||
] | ||
|
||
|
||
@pytest.fixture | ||
def arrays_for_binary_ufunc(): | ||
""" | ||
A pair of random, length-100 integer-dtype arrays, that are mostly 0. | ||
""" | ||
a1 = np.random.randint(0, 10, 100) | ||
a2 = np.random.randint(0, 10, 100) | ||
a1[::3] = 0 | ||
a2[::4] = 0 | ||
return a1, a2 | ||
|
||
|
||
@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) | ||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) | ||
def test_unary_ufunc(ufunc, sparse): | ||
array = np.random.randint(0, 10, 10) | ||
array[::2] = 0 | ||
if sparse: | ||
array = pd.SparseArray(array, dtype=pd.SparseDtype('int', 0)) | ||
|
||
index = list(string.ascii_letters[:10]) | ||
name = "name" | ||
series = pd.Series(array, index=index, name=name) | ||
|
||
result = ufunc(series) | ||
expected = pd.Series(ufunc(array), index=index, name=name) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) | ||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) | ||
@pytest.mark.parametrize("shuffle", SHUFFLE) | ||
@pytest.mark.parametrize("box_other", [True, False]) | ||
def test_binary_ufunc(ufunc, sparse, shuffle, box_other, | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
arrays_for_binary_ufunc): | ||
# Check the invariant that | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you give a little expl of what the parametizations do if not obvious, e.g. flip & shuffle actually are not immediately obvious There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may split those out to separate tests. It'll be a bit more code, but much clearer. |
||
# ufunc(Series(a), Series(b)) == Series(ufunc(a, b)) | ||
# with alignment. | ||
a1, a2 = arrays_for_binary_ufunc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you call these: left_array, right_array I know its a bit longer, but more readable IMHO |
||
if sparse: | ||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) | ||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) | ||
|
||
name = "name" | ||
# TODO: verify name when the differ? Take the first? Drop? | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
s1 = pd.Series(a1, name=name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. left_series and right_series |
||
s2 = pd.Series(a2, name=name) | ||
|
||
# handle shufling / alignment | ||
# If boxing -- ufunc(series, series) -- then we don't need to shuffle | ||
# the other array for the expected, since we align. | ||
# If not boxing -- ufunc(series, array) -- then we do need to shuffle | ||
# the other array, since we *dont'* align | ||
idx = np.random.permutation(len(s1)) | ||
if box_other and shuffle: | ||
# ensure we align before applying the ufunc | ||
s2 = s2.take(idx) | ||
elif shuffle: | ||
a2 = a2.take(idx) | ||
|
||
result = ufunc(s1, s2) | ||
expected = pd.Series(ufunc(a1, a2), name=name) | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) | ||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) | ||
@pytest.mark.parametrize("flip", [True, False]) | ||
def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): | ||
array, _ = arrays_for_binary_ufunc | ||
if sparse: | ||
array = pd.SparseArray(array) | ||
other = 2 | ||
series = pd.Series(array, name="name") | ||
|
||
a, b = series, other | ||
c, d = array, other | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above if you can make this more clear |
||
if flip: | ||
c, d = b, c | ||
a, b = b, a | ||
|
||
expected = pd.Series(ufunc(a, b), name="name") | ||
result = pd.Series(ufunc(c, d), name="name") | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize("ufunc", [np.divmod]) # any others? | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) | ||
@pytest.mark.parametrize("shuffle", SHUFFLE) | ||
@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") | ||
def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, | ||
arrays_for_binary_ufunc): | ||
a1, a2 = arrays_for_binary_ufunc | ||
|
||
if sparse: | ||
a1 = pd.SparseArray(a1, dtype=pd.SparseDtype('int', 0)) | ||
a2 = pd.SparseArray(a2, dtype=pd.SparseDtype('int', 0)) | ||
|
||
s1 = pd.Series(a1) | ||
s2 = pd.Series(a2) | ||
|
||
if shuffle: | ||
# ensure we align before applying the ufunc | ||
s2 = s2.sample(frac=1) | ||
|
||
expected = ufunc(a1, a2) | ||
assert isinstance(expected, tuple) | ||
|
||
result = ufunc(s1, s2) | ||
assert isinstance(result, tuple) | ||
tm.assert_series_equal(result[0], pd.Series(expected[0])) | ||
tm.assert_series_equal(result[1], pd.Series(expected[1])) | ||
|
||
|
||
@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) | ||
def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): | ||
array, _ = arrays_for_binary_ufunc | ||
|
||
if sparse: | ||
array = pd.SparseArray(array) | ||
|
||
series = pd.Series(array, name="name") | ||
result = np.modf(series) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok I c you are doing this here |
||
expected = np.modf(array) | ||
|
||
assert isinstance(result, tuple) | ||
assert isinstance(expected, tuple) | ||
|
||
tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) | ||
tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) |
Uh oh!
There was an error while loading. Please reload this page.