TST: Add test for where inplace (#44255)

rlvoyer · web-flow · commit 9db55a75243a · 2021-12-01T21:02:05.000-05:00
diff --git a/pandas/_testing/_hypothesis.py b/pandas/_testing/_hypothesis.py
@@ -0,0 +1,85 @@
+"""
+Hypothesis data generator helpers.
+"""
+from datetime import datetime
+
+from hypothesis import strategies as st
+from hypothesis.extra.dateutil import timezones as dateutil_timezones
+from hypothesis.extra.pytz import timezones as pytz_timezones
+
+from pandas.compat import is_platform_windows
+
+import pandas as pd
+
+from pandas.tseries.offsets import (
+    BMonthBegin,
+    BMonthEnd,
+    BQuarterBegin,
+    BQuarterEnd,
+    BYearBegin,
+    BYearEnd,
+    MonthBegin,
+    MonthEnd,
+    QuarterBegin,
+    QuarterEnd,
+    YearBegin,
+    YearEnd,
+)
+
+OPTIONAL_INTS = st.lists(st.one_of(st.integers(), st.none()), max_size=10, min_size=3)
+
+OPTIONAL_FLOATS = st.lists(st.one_of(st.floats(), st.none()), max_size=10, min_size=3)
+
+OPTIONAL_TEXT = st.lists(st.one_of(st.none(), st.text()), max_size=10, min_size=3)
+
+OPTIONAL_DICTS = st.lists(
+    st.one_of(st.none(), st.dictionaries(st.text(), st.integers())),
+    max_size=10,
+    min_size=3,
+)
+
+OPTIONAL_LISTS = st.lists(
+    st.one_of(st.none(), st.lists(st.text(), max_size=10, min_size=3)),
+    max_size=10,
+    min_size=3,
+)
+
+if is_platform_windows():
+    DATETIME_NO_TZ = st.datetimes(min_value=datetime(1900, 1, 1))
+else:
+    DATETIME_NO_TZ = st.datetimes()
+
+DATETIME_JAN_1_1900_OPTIONAL_TZ = st.datetimes(
+    min_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
+    max_value=pd.Timestamp(1900, 1, 1).to_pydatetime(),
+    timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
+)
+
+DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ = st.datetimes(
+    min_value=pd.Timestamp.min.to_pydatetime(warn=False),
+    max_value=pd.Timestamp.max.to_pydatetime(warn=False),
+)
+
+INT_NEG_999_TO_POS_999 = st.integers(-999, 999)
+
+# The strategy for each type is registered in conftest.py, as they don't carry
+# enough runtime information (e.g. type hints) to infer how to build them.
+YQM_OFFSET = st.one_of(
+    *map(
+        st.from_type,
+        [
+            MonthBegin,
+            MonthEnd,
+            BMonthBegin,
+            BMonthEnd,
+            QuarterBegin,
+            QuarterEnd,
+            BQuarterBegin,
+            BQuarterEnd,
+            YearBegin,
+            YearEnd,
+            BYearBegin,
+            BYearEnd,
+        ],
+    )
+)
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -1,5 +1,9 @@
 from datetime import datetime
 
+from hypothesis import (
+    given,
+    strategies as st,
+)
 import numpy as np
 import pytest
 
@@ -16,6 +20,13 @@
     isna,
 )
 import pandas._testing as tm
+from pandas._testing._hypothesis import (
+    OPTIONAL_DICTS,
+    OPTIONAL_FLOATS,
+    OPTIONAL_INTS,
+    OPTIONAL_LISTS,
+    OPTIONAL_TEXT,
+)
 
 
 @pytest.fixture(params=["default", "float_string", "mixed_float", "mixed_int"])
@@ -797,3 +808,16 @@ def test_where_columns_casting():
     result = df.where(pd.notnull(df), None)
     # make sure dtypes don't change
     tm.assert_frame_equal(expected, result)
+
+
+@given(
+    data=st.one_of(
+        OPTIONAL_DICTS, OPTIONAL_FLOATS, OPTIONAL_INTS, OPTIONAL_LISTS, OPTIONAL_TEXT
+    )
+)
+def test_where_inplace_casting(data):
+    # GH 22051
+    df = DataFrame({"a": data})
+    df_copy = df.where(pd.notnull(df), None).copy()
+    df.where(pd.notnull(df), None, inplace=True)
+    tm.assert_equal(df, df_copy)
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -14,18 +14,14 @@
 from hypothesis import (
     given,
     settings,
-    strategies as st,
 )
 import numpy as np
 import pytest
 import pytz
 
 from pandas._libs.tslibs import parsing
 from pandas._libs.tslibs.parsing import parse_datetime_string
-from pandas.compat import (
-    is_platform_windows,
-    np_array_datetime64_compat,
-)
+from pandas.compat import np_array_datetime64_compat
 from pandas.compat.pyarrow import pa_version_under6p0
 
 import pandas as pd
@@ -38,6 +34,7 @@
     Timestamp,
 )
 import pandas._testing as tm
+from pandas._testing._hypothesis import DATETIME_NO_TZ
 from pandas.core.indexes.datetimes import date_range
 
 import pandas.io.date_converters as conv
@@ -52,12 +49,6 @@
 # constant
 _DEFAULT_DATETIME = datetime(1, 1, 1)
 
-# Strategy for hypothesis
-if is_platform_windows():
-    date_strategy = st.datetimes(min_value=datetime(1900, 1, 1))
-else:
-    date_strategy = st.datetimes()
-
 
 @xfail_pyarrow
 def test_read_csv_with_custom_date_parser(all_parsers):
@@ -1683,7 +1674,7 @@ def _helper_hypothesis_delimited_date(call, date_string, **kwargs):
 
 
 @skip_pyarrow
-@given(date_strategy)
+@given(DATETIME_NO_TZ)
 @settings(deadline=None)
 @pytest.mark.parametrize("delimiter", list(" -./"))
 @pytest.mark.parametrize("dayfirst", [True, False])
diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py
@@ -7,92 +7,26 @@
 You may wish to consult the previous version for inspiration on further
 tests, or when trying to pin down the bugs exposed by the tests below.
 """
-import warnings
-
 from hypothesis import (
     assume,
     given,
-    strategies as st,
 )
 from hypothesis.errors import Flaky
-from hypothesis.extra.dateutil import timezones as dateutil_timezones
-from hypothesis.extra.pytz import timezones as pytz_timezones
 import pytest
 import pytz
 
 import pandas as pd
-from pandas import Timestamp
-
-from pandas.tseries.offsets import (
-    BMonthBegin,
-    BMonthEnd,
-    BQuarterBegin,
-    BQuarterEnd,
-    BYearBegin,
-    BYearEnd,
-    MonthBegin,
-    MonthEnd,
-    QuarterBegin,
-    QuarterEnd,
-    YearBegin,
-    YearEnd,
-)
-
-# ----------------------------------------------------------------
-# Helpers for generating random data
-
-with warnings.catch_warnings():
-    warnings.simplefilter("ignore")
-    min_dt = Timestamp(1900, 1, 1).to_pydatetime()
-    max_dt = Timestamp(1900, 1, 1).to_pydatetime()
-
-gen_date_range = st.builds(
-    pd.date_range,
-    start=st.datetimes(
-        # TODO: Choose the min/max values more systematically
-        min_value=Timestamp(1900, 1, 1).to_pydatetime(),
-        max_value=Timestamp(2100, 1, 1).to_pydatetime(),
-    ),
-    periods=st.integers(min_value=2, max_value=100),
-    freq=st.sampled_from("Y Q M D H T s ms us ns".split()),
-    tz=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
+from pandas._testing._hypothesis import (
+    DATETIME_JAN_1_1900_OPTIONAL_TZ,
+    YQM_OFFSET,
 )
 
-gen_random_datetime = st.datetimes(
-    min_value=min_dt,
-    max_value=max_dt,
-    timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()),
-)
-
-# The strategy for each type is registered in conftest.py, as they don't carry
-# enough runtime information (e.g. type hints) to infer how to build them.
-gen_yqm_offset = st.one_of(
-    *map(
-        st.from_type,
-        [
-            MonthBegin,
-            MonthEnd,
-            BMonthBegin,
-            BMonthEnd,
-            QuarterBegin,
-            QuarterEnd,
-            BQuarterBegin,
-            BQuarterEnd,
-            YearBegin,
-            YearEnd,
-            BYearBegin,
-            BYearEnd,
-        ],
-    )
-)
-
-
 # ----------------------------------------------------------------
 # Offset-specific behaviour tests
 
 
 @pytest.mark.arm_slow
-@given(gen_random_datetime, gen_yqm_offset)
+@given(DATETIME_JAN_1_1900_OPTIONAL_TZ, YQM_OFFSET)
 def test_on_offset_implementations(dt, offset):
     assume(not offset.normalize)
     # check that the class-specific implementations of is_on_offset match
@@ -112,7 +46,7 @@ def test_on_offset_implementations(dt, offset):
 
 
 @pytest.mark.xfail(strict=False, raises=Flaky, reason="unreliable test timings")
-@given(gen_yqm_offset)
+@given(YQM_OFFSET)
 def test_shift_across_dst(offset):
     # GH#18319 check that 1) timezone is correctly normalized and
     # 2) that hour is not incorrectly changed by this normalization
diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py
@@ -11,7 +11,6 @@
     example,
     given,
     settings,
-    strategies as st,
 )
 import numpy as np
 import pytest
@@ -23,6 +22,7 @@
     Timestamp,
 )
 import pandas._testing as tm
+from pandas._testing._hypothesis import INT_NEG_999_TO_POS_999
 from pandas.tests.tseries.offsets.common import assert_offset_equal
 
 from pandas.tseries import offsets
@@ -66,7 +66,7 @@ def test_delta_to_tick():
 @example(n=2, m=3)
 @example(n=800, m=300)
 @example(n=1000, m=5)
-@given(n=st.integers(-999, 999), m=st.integers(-999, 999))
+@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999)
 def test_tick_add_sub(cls, n, m):
     # For all Tick subclasses and all integers n, m, we should have
     # tick(n) + tick(m) == tick(n+m)
@@ -86,7 +86,7 @@ def test_tick_add_sub(cls, n, m):
 @pytest.mark.parametrize("cls", tick_classes)
 @settings(deadline=None)
 @example(n=2, m=3)
-@given(n=st.integers(-999, 999), m=st.integers(-999, 999))
+@given(n=INT_NEG_999_TO_POS_999, m=INT_NEG_999_TO_POS_999)
 def test_tick_equality(cls, n, m):
     assume(m != n)
     # tick == tock iff tick.n == tock.n
diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py
@@ -3,16 +3,13 @@
     datetime,
 )
 
-from hypothesis import (
-    given,
-    strategies as st,
-)
+from hypothesis import given
 import numpy as np
 import pytest
 
 from pandas._libs.tslibs import ccalendar
 
-import pandas as pd
+from pandas._testing._hypothesis import DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ
 
 
 @pytest.mark.parametrize(
@@ -59,12 +56,7 @@ def test_dt_correct_iso_8601_year_week_and_day(input_date_tuple, expected_iso_tu
     assert result == expected_iso_tuple
 
 
-@given(
-    st.datetimes(
-        min_value=pd.Timestamp.min.to_pydatetime(warn=False),
-        max_value=pd.Timestamp.max.to_pydatetime(warn=False),
-    )
-)
+@given(DATETIME_IN_PD_TIMESTAMP_RANGE_NO_TZ)
 def test_isocalendar(dt):
     expected = dt.isocalendar()
     result = ccalendar.get_iso_calendar(dt.year, dt.month, dt.day)