Skip to content

CLN: Split pandas/tests/base/test_ops.py #32744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pandas/tests/base/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from typing import Any

from pandas import Index


def allow_na_ops(obj: Any) -> bool:
"""Whether to skip test cases including NaN"""
is_bool_index = isinstance(obj, Index) and obj.is_boolean()
return not is_bool_index and obj._can_hold_na
30 changes: 30 additions & 0 deletions pandas/tests/base/test_drop_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from datetime import datetime

import numpy as np

import pandas as pd
import pandas._testing as tm


def test_drop_duplicates_series_vs_dataframe():
# GH 14192
df = pd.DataFrame(
{
"a": [1, 1, 1, "one", "one"],
"b": [2, 2, np.nan, np.nan, np.nan],
"c": [3, 3, np.nan, np.nan, "three"],
"d": [1, 2, 3, 4, 4],
"e": [
datetime(2015, 1, 1),
datetime(2015, 1, 1),
datetime(2015, 2, 1),
pd.NaT,
pd.NaT,
],
}
)
for column in df.columns:
for keep in ["first", "last", False]:
dropped_frame = df[[column]].drop_duplicates(keep=keep)
dropped_series = df[column].drop_duplicates(keep=keep)
tm.assert_frame_equal(dropped_frame, dropped_series.to_frame())
28 changes: 28 additions & 0 deletions pandas/tests/base/test_factorize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm


@pytest.mark.parametrize("sort", [True, False])
def test_factorize(index_or_series_obj, sort):
obj = index_or_series_obj
result_codes, result_uniques = obj.factorize(sort=sort)

constructor = pd.Index
if isinstance(obj, pd.MultiIndex):
constructor = pd.MultiIndex.from_tuples
expected_uniques = constructor(obj.unique())

if sort:
expected_uniques = expected_uniques.sort_values()

# construct an integer ndarray so that
# `expected_uniques.take(expected_codes)` is equal to `obj`
expected_uniques_list = list(expected_uniques)
expected_codes = [expected_uniques_list.index(val) for val in obj]
expected_codes = np.asarray(expected_codes, dtype=np.intp)

tm.assert_numpy_array_equal(result_codes, expected_codes)
tm.assert_index_equal(result_uniques, expected_uniques)
70 changes: 70 additions & 0 deletions pandas/tests/base/test_fillna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Though Index.fillna and Series.fillna has separate impl,
test here to confirm these works as the same
"""

import numpy as np
import pytest

from pandas._libs.tslib import iNaT

from pandas.core.dtypes.common import needs_i8_conversion
from pandas.core.dtypes.generic import ABCMultiIndex

from pandas import Index
import pandas._testing as tm
from pandas.tests.base.common import allow_na_ops


def test_fillna(index_or_series_obj):
# GH 11343
obj = index_or_series_obj
if isinstance(obj, ABCMultiIndex):
pytest.skip("MultiIndex doesn't support isna")

# values will not be changed
fill_value = obj.values[0] if len(obj) > 0 else 0
result = obj.fillna(fill_value)
if isinstance(obj, Index):
tm.assert_index_equal(obj, result)
else:
tm.assert_series_equal(obj, result)

# check shallow_copied
assert obj is not result


@pytest.mark.parametrize("null_obj", [np.nan, None])
def test_fillna_null(null_obj, index_or_series_obj):
# GH 11343
obj = index_or_series_obj
klass = type(obj)

if not allow_na_ops(obj):
pytest.skip(f"{klass} doesn't allow for NA operations")
elif len(obj) < 1:
pytest.skip("Test doesn't make sense on empty data")
elif isinstance(obj, ABCMultiIndex):
pytest.skip(f"MultiIndex can't hold '{null_obj}'")

values = obj.values
fill_value = values[0]
expected = values.copy()
if needs_i8_conversion(obj):
values[0:2] = iNaT
expected[0:2] = fill_value
else:
values[0:2] = null_obj
expected[0:2] = fill_value

expected = klass(expected)
obj = klass(values)

result = obj.fillna(fill_value)
if isinstance(obj, Index):
tm.assert_index_equal(result, expected)
else:
tm.assert_series_equal(result, expected)

# check shallow_copied
assert obj is not result
204 changes: 204 additions & 0 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import sys

import numpy as np
import pytest

from pandas.compat import PYPY

from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_object_dtype,
)

import pandas as pd
from pandas import DataFrame, Index, IntervalIndex, Series
import pandas._testing as tm


@pytest.mark.parametrize(
"op_name, op",
[
("add", "+"),
("sub", "-"),
("mul", "*"),
("mod", "%"),
("pow", "**"),
("truediv", "/"),
("floordiv", "//"),
],
)
@pytest.mark.parametrize("klass", [Series, DataFrame])
def test_binary_ops_docstring(klass, op_name, op):
# not using the all_arithmetic_functions fixture with _get_opstr
# as _get_opstr is used internally in the dynamic implementation of the docstring
operand1 = klass.__name__.lower()
operand2 = "other"
expected_str = " ".join([operand1, op, operand2])
assert expected_str in getattr(klass, op_name).__doc__

# reverse version of the binary ops
expected_str = " ".join([operand2, op, operand1])
assert expected_str in getattr(klass, "r" + op_name).__doc__


def test_none_comparison(series_with_simple_index):
series = series_with_simple_index
if isinstance(series.index, IntervalIndex):
# IntervalIndex breaks on "series[0] = np.nan" below
pytest.skip("IntervalIndex doesn't support assignment")
if len(series) < 1:
pytest.skip("Test doesn't make sense on empty data")

# bug brought up by #1079
# changed from TypeError in 0.17.0
series[0] = np.nan

# noinspection PyComparisonWithNone
result = series == None # noqa
assert not result.iat[0]
assert not result.iat[1]

# noinspection PyComparisonWithNone
result = series != None # noqa
assert result.iat[0]
assert result.iat[1]

result = None == series # noqa
assert not result.iat[0]
assert not result.iat[1]

result = None != series # noqa
assert result.iat[0]
assert result.iat[1]

if is_datetime64_dtype(series) or is_datetime64tz_dtype(series):
# Following DatetimeIndex (and Timestamp) convention,
# inequality comparisons with Series[datetime64] raise
msg = "Invalid comparison"
with pytest.raises(TypeError, match=msg):
None > series
with pytest.raises(TypeError, match=msg):
series > None
else:
result = None > series
assert not result.iat[0]
assert not result.iat[1]

result = series < None
assert not result.iat[0]
assert not result.iat[1]


def test_ndarray_compat_properties(index_or_series_obj):
obj = index_or_series_obj

# Check that we work.
for p in ["shape", "dtype", "T", "nbytes"]:
assert getattr(obj, p, None) is not None

# deprecated properties
for p in ["flags", "strides", "itemsize", "base", "data"]:
assert not hasattr(obj, p)

msg = "can only convert an array of size 1 to a Python scalar"
with pytest.raises(ValueError, match=msg):
obj.item() # len > 1

assert obj.ndim == 1
assert obj.size == len(obj)

assert Index([1]).item() == 1
assert Series([1]).item() == 1


@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
def test_memory_usage(index_or_series_obj):
obj = index_or_series_obj
res = obj.memory_usage()
res_deep = obj.memory_usage(deep=True)

is_object = is_object_dtype(obj) or (
isinstance(obj, Series) and is_object_dtype(obj.index)
)
is_categorical = is_categorical_dtype(obj) or (
isinstance(obj, Series) and is_categorical_dtype(obj.index)
)

if len(obj) == 0:
assert res_deep == res == 0
elif is_object or is_categorical:
# only deep will pick them up
assert res_deep > res
else:
assert res == res_deep

# sys.getsizeof will call the .memory_usage with
# deep=True, and add on some GC overhead
diff = res_deep - sys.getsizeof(obj)
assert abs(diff) < 100


def test_memory_usage_components_series(series_with_simple_index):
series = series_with_simple_index
total_usage = series.memory_usage(index=True)
non_index_usage = series.memory_usage(index=False)
index_usage = series.index.memory_usage()
assert total_usage == non_index_usage + index_usage


def test_memory_usage_components_narrow_series(narrow_series):
series = narrow_series
total_usage = series.memory_usage(index=True)
non_index_usage = series.memory_usage(index=False)
index_usage = series.index.memory_usage()
assert total_usage == non_index_usage + index_usage


def test_searchsorted(index_or_series_obj):
# numpy.searchsorted calls obj.searchsorted under the hood.
# See gh-12238
obj = index_or_series_obj

if isinstance(obj, pd.MultiIndex):
# See gh-14833
pytest.skip("np.searchsorted doesn't work on pd.MultiIndex")

max_obj = max(obj, default=0)
index = np.searchsorted(obj, max_obj)
assert 0 <= index <= len(obj)

index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
assert 0 <= index <= len(obj)


def test_access_by_position(indices):
index = indices

if len(index) == 0:
pytest.skip("Test doesn't make sense on empty data")
elif isinstance(index, pd.MultiIndex):
pytest.skip("Can't instantiate Series from MultiIndex")

series = pd.Series(index)
assert index[0] == series.iloc[0]
assert index[5] == series.iloc[5]
assert index[-1] == series.iloc[-1]

size = len(index)
assert index[-1] == index[size - 1]

msg = f"index {size} is out of bounds for axis 0 with size {size}"
with pytest.raises(IndexError, match=msg):
index[size]
msg = "single positional indexer is out-of-bounds"
with pytest.raises(IndexError, match=msg):
series.iloc[size]


def test_get_indexer_non_unique_dtype_mismatch():
# GH 25459
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)
Loading