pandas-dev · jreback · Mar 17, 2020 · Mar 16, 2020 · Mar 17, 2020
diff --git a/pandas/tests/base/common.py b/pandas/tests/base/common.py
@@ -0,0 +1,9 @@
+from typing import Any
+
+from pandas import Index
+
+
+def allow_na_ops(obj: Any) -> bool:
+    """Whether to skip test cases including NaN"""
+    is_bool_index = isinstance(obj, Index) and obj.is_boolean()
+    return not is_bool_index and obj._can_hold_na
diff --git a/pandas/tests/base/test_drop_duplicates.py b/pandas/tests/base/test_drop_duplicates.py
@@ -0,0 +1,30 @@
+from datetime import datetime
+
+import numpy as np
+
+import pandas as pd
+import pandas._testing as tm
+
+
+def test_drop_duplicates_series_vs_dataframe():
+    # GH 14192
+    df = pd.DataFrame(
+        {
+            "a": [1, 1, 1, "one", "one"],
+            "b": [2, 2, np.nan, np.nan, np.nan],
+            "c": [3, 3, np.nan, np.nan, "three"],
+            "d": [1, 2, 3, 4, 4],
+            "e": [
+                datetime(2015, 1, 1),
+                datetime(2015, 1, 1),
+                datetime(2015, 2, 1),
+                pd.NaT,
+                pd.NaT,
+            ],
+        }
+    )
+    for column in df.columns:
+        for keep in ["first", "last", False]:
+            dropped_frame = df[[column]].drop_duplicates(keep=keep)
+            dropped_series = df[column].drop_duplicates(keep=keep)
+            tm.assert_frame_equal(dropped_frame, dropped_series.to_frame())
diff --git a/pandas/tests/base/test_factorize.py b/pandas/tests/base/test_factorize.py
@@ -0,0 +1,28 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("sort", [True, False])
+def test_factorize(index_or_series_obj, sort):
+    obj = index_or_series_obj
+    result_codes, result_uniques = obj.factorize(sort=sort)
+
+    constructor = pd.Index
+    if isinstance(obj, pd.MultiIndex):
+        constructor = pd.MultiIndex.from_tuples
+    expected_uniques = constructor(obj.unique())
+
+    if sort:
+        expected_uniques = expected_uniques.sort_values()
+
+    # construct an integer ndarray so that
+    # `expected_uniques.take(expected_codes)` is equal to `obj`
+    expected_uniques_list = list(expected_uniques)
+    expected_codes = [expected_uniques_list.index(val) for val in obj]
+    expected_codes = np.asarray(expected_codes, dtype=np.intp)
+
+    tm.assert_numpy_array_equal(result_codes, expected_codes)
+    tm.assert_index_equal(result_uniques, expected_uniques)
diff --git a/pandas/tests/base/test_fillna.py b/pandas/tests/base/test_fillna.py
@@ -0,0 +1,70 @@
+"""
+Though Index.fillna and Series.fillna has separate impl,
+test here to confirm these works as the same
+"""
+
+import numpy as np
+import pytest
+
+from pandas._libs.tslib import iNaT
+
+from pandas.core.dtypes.common import needs_i8_conversion
+from pandas.core.dtypes.generic import ABCMultiIndex
+
+from pandas import Index
+import pandas._testing as tm
+from pandas.tests.base.common import allow_na_ops
+
+
+def test_fillna(index_or_series_obj):
+    # GH 11343
+    obj = index_or_series_obj
+    if isinstance(obj, ABCMultiIndex):
+        pytest.skip("MultiIndex doesn't support isna")
+
+    # values will not be changed
+    fill_value = obj.values[0] if len(obj) > 0 else 0
+    result = obj.fillna(fill_value)
+    if isinstance(obj, Index):
+        tm.assert_index_equal(obj, result)
+    else:
+        tm.assert_series_equal(obj, result)
+
+    # check shallow_copied
+    assert obj is not result
+
+
+@pytest.mark.parametrize("null_obj", [np.nan, None])
+def test_fillna_null(null_obj, index_or_series_obj):
+    # GH 11343
+    obj = index_or_series_obj
+    klass = type(obj)
+
+    if not allow_na_ops(obj):
+        pytest.skip(f"{klass} doesn't allow for NA operations")
+    elif len(obj) < 1:
+        pytest.skip("Test doesn't make sense on empty data")
+    elif isinstance(obj, ABCMultiIndex):
+        pytest.skip(f"MultiIndex can't hold '{null_obj}'")
+
+    values = obj.values
+    fill_value = values[0]
+    expected = values.copy()
+    if needs_i8_conversion(obj):
+        values[0:2] = iNaT
+        expected[0:2] = fill_value
+    else:
+        values[0:2] = null_obj
+        expected[0:2] = fill_value
+
+    expected = klass(expected)
+    obj = klass(values)
+
+    result = obj.fillna(fill_value)
+    if isinstance(obj, Index):
+        tm.assert_index_equal(result, expected)
+    else:
+        tm.assert_series_equal(result, expected)
+
+    # check shallow_copied
+    assert obj is not result
diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
@@ -0,0 +1,204 @@
+import sys
+
+import numpy as np
+import pytest
+
+from pandas.compat import PYPY
+
+from pandas.core.dtypes.common import (
+    is_categorical_dtype,
+    is_datetime64_dtype,
+    is_datetime64tz_dtype,
+    is_object_dtype,
+)
+
+import pandas as pd
+from pandas import DataFrame, Index, IntervalIndex, Series
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "op_name, op",
+    [
+        ("add", "+"),
+        ("sub", "-"),
+        ("mul", "*"),
+        ("mod", "%"),
+        ("pow", "**"),
+        ("truediv", "/"),
+        ("floordiv", "//"),
+    ],
+)
+@pytest.mark.parametrize("klass", [Series, DataFrame])
+def test_binary_ops_docstring(klass, op_name, op):
+    # not using the all_arithmetic_functions fixture with _get_opstr
+    # as _get_opstr is used internally in the dynamic implementation of the docstring
+    operand1 = klass.__name__.lower()
+    operand2 = "other"
+    expected_str = " ".join([operand1, op, operand2])
+    assert expected_str in getattr(klass, op_name).__doc__
+
+    # reverse version of the binary ops
+    expected_str = " ".join([operand2, op, operand1])
+    assert expected_str in getattr(klass, "r" + op_name).__doc__
+
+
+def test_none_comparison(series_with_simple_index):
+    series = series_with_simple_index
+    if isinstance(series.index, IntervalIndex):
+        # IntervalIndex breaks on "series[0] = np.nan" below
+        pytest.skip("IntervalIndex doesn't support assignment")
+    if len(series) < 1:
+        pytest.skip("Test doesn't make sense on empty data")
+
+    # bug brought up by #1079
+    # changed from TypeError in 0.17.0
+    series[0] = np.nan
+
+    # noinspection PyComparisonWithNone
+    result = series == None  # noqa
+    assert not result.iat[0]
+    assert not result.iat[1]
+
+    # noinspection PyComparisonWithNone
+    result = series != None  # noqa
+    assert result.iat[0]
+    assert result.iat[1]
+
+    result = None == series  # noqa
+    assert not result.iat[0]
+    assert not result.iat[1]
+
+    result = None != series  # noqa
+    assert result.iat[0]
+    assert result.iat[1]
+
+    if is_datetime64_dtype(series) or is_datetime64tz_dtype(series):
+        # Following DatetimeIndex (and Timestamp) convention,
+        # inequality comparisons with Series[datetime64] raise
+        msg = "Invalid comparison"
+        with pytest.raises(TypeError, match=msg):
+            None > series
+        with pytest.raises(TypeError, match=msg):
+            series > None
+    else:
+        result = None > series
+        assert not result.iat[0]
+        assert not result.iat[1]
+
+        result = series < None
+        assert not result.iat[0]
+        assert not result.iat[1]
+
+
+def test_ndarray_compat_properties(index_or_series_obj):
+    obj = index_or_series_obj
+
+    # Check that we work.
+    for p in ["shape", "dtype", "T", "nbytes"]:
+        assert getattr(obj, p, None) is not None
+
+    # deprecated properties
+    for p in ["flags", "strides", "itemsize", "base", "data"]:
+        assert not hasattr(obj, p)
+
+    msg = "can only convert an array of size 1 to a Python scalar"
+    with pytest.raises(ValueError, match=msg):
+        obj.item()  # len > 1
+
+    assert obj.ndim == 1
+    assert obj.size == len(obj)
+
+    assert Index([1]).item() == 1
+    assert Series([1]).item() == 1
+
+
+@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
+def test_memory_usage(index_or_series_obj):
+    obj = index_or_series_obj
+    res = obj.memory_usage()
+    res_deep = obj.memory_usage(deep=True)
+
+    is_object = is_object_dtype(obj) or (
+        isinstance(obj, Series) and is_object_dtype(obj.index)
+    )
+    is_categorical = is_categorical_dtype(obj) or (
+        isinstance(obj, Series) and is_categorical_dtype(obj.index)
+    )
+
+    if len(obj) == 0:
+        assert res_deep == res == 0
+    elif is_object or is_categorical:
+        # only deep will pick them up
+        assert res_deep > res
+    else:
+        assert res == res_deep
+
+    # sys.getsizeof will call the .memory_usage with
+    # deep=True, and add on some GC overhead
+    diff = res_deep - sys.getsizeof(obj)
+    assert abs(diff) < 100
+
+
+def test_memory_usage_components_series(series_with_simple_index):
+    series = series_with_simple_index
+    total_usage = series.memory_usage(index=True)
+    non_index_usage = series.memory_usage(index=False)
+    index_usage = series.index.memory_usage()
+    assert total_usage == non_index_usage + index_usage
+
+
+def test_memory_usage_components_narrow_series(narrow_series):
+    series = narrow_series
+    total_usage = series.memory_usage(index=True)
+    non_index_usage = series.memory_usage(index=False)
+    index_usage = series.index.memory_usage()
+    assert total_usage == non_index_usage + index_usage
+
+
+def test_searchsorted(index_or_series_obj):
+    # numpy.searchsorted calls obj.searchsorted under the hood.
+    # See gh-12238
+    obj = index_or_series_obj
+
+    if isinstance(obj, pd.MultiIndex):
+        # See gh-14833
+        pytest.skip("np.searchsorted doesn't work on pd.MultiIndex")
+
+    max_obj = max(obj, default=0)
+    index = np.searchsorted(obj, max_obj)
+    assert 0 <= index <= len(obj)
+
+    index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
+    assert 0 <= index <= len(obj)
+
+
+def test_access_by_position(indices):
+    index = indices
+
+    if len(index) == 0:
+        pytest.skip("Test doesn't make sense on empty data")
+    elif isinstance(index, pd.MultiIndex):
+        pytest.skip("Can't instantiate Series from MultiIndex")
+
+    series = pd.Series(index)
+    assert index[0] == series.iloc[0]
+    assert index[5] == series.iloc[5]
+    assert index[-1] == series.iloc[-1]
+
+    size = len(index)
+    assert index[-1] == index[size - 1]
+
+    msg = f"index {size} is out of bounds for axis 0 with size {size}"
+    with pytest.raises(IndexError, match=msg):
+        index[size]
+    msg = "single positional indexer is out-of-bounds"
+    with pytest.raises(IndexError, match=msg):
+        series.iloc[size]
+
+
+def test_get_indexer_non_unique_dtype_mismatch():
+    # GH 25459
+    indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
+    tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
+    tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)