diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index be48552fb04e9..db39d3616ca60 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -248,13 +248,26 @@ Assignment to multiple columns of a :class:`DataFrame` when some of the columns Deprecations ~~~~~~~~~~~~ + - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) + - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) - :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`) - The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`) +- Passing any arguments but the first one to :func:`read_html` as + positional arguments is deprecated since version 1.1. All other + arguments should be given as keyword arguments (:issue:`27573`). + +- Passing any arguments but `path_or_buf` (the first one) to + :func:`read_json` as positional arguments is deprecated since + version 1.1. All other arguments should be given as keyword + arguments (:issue:`27573`). + +- + .. --------------------------------------------------------------------------- diff --git a/pandas/io/html.py b/pandas/io/html.py index ce6674ffb9588..2d48b40200fa6 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -11,6 +11,7 @@ from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError, EmptyDataError +from pandas.util._decorators import deprecate_nonkeyword_arguments from pandas.core.dtypes.common import is_list_like @@ -921,6 +922,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): return ret +@deprecate_nonkeyword_arguments(version="2.0") def read_html( io, match=".+", diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index d6b90ae99973e..3b1164a51ba91 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -11,7 +11,7 @@ from pandas._libs.tslibs import iNaT from pandas._typing import JSONSerializable from pandas.errors import AbstractMethodError -from pandas.util._decorators import deprecate_kwarg +from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments from pandas.core.dtypes.common import ensure_str, is_period_dtype @@ -345,6 +345,9 @@ def _write( @deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) +@deprecate_nonkeyword_arguments( + version="2.0", allowed_args=["path_or_buf"], stacklevel=3 +) def read_json( path_or_buf=None, orient=None, diff --git a/pandas/tests/io/json/test_deprecated_kwargs.py b/pandas/tests/io/json/test_deprecated_kwargs.py new file mode 100644 index 0000000000000..79245bc9d34a8 --- /dev/null +++ b/pandas/tests/io/json/test_deprecated_kwargs.py @@ -0,0 +1,31 @@ +""" +Tests for the deprecated keyword arguments for `read_json`. +""" + +import pandas as pd +import pandas._testing as tm + +from pandas.io.json import read_json + + +def test_deprecated_kwargs(): + df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) + buf = df.to_json(orient="split") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "split")) + buf = df.to_json(orient="columns") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "columns")) + buf = df.to_json(orient="index") + with tm.assert_produces_warning(FutureWarning): + tm.assert_frame_equal(df, read_json(buf, "index")) + + +def test_good_kwargs(): + df = pd.DataFrame({"A": [2, 4, 6], "B": [3, 6, 9]}, index=[0, 1, 2]) + with tm.assert_produces_warning(None): + tm.assert_frame_equal(df, read_json(df.to_json(orient="split"), orient="split")) + tm.assert_frame_equal( + df, read_json(df.to_json(orient="columns"), orient="columns") + ) + tm.assert_frame_equal(df, read_json(df.to_json(orient="index"), orient="index")) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index cbaf16d048eda..3d73e983402a7 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -72,7 +72,7 @@ def test_invalid_flavor(): msg = r"\{" + flavor + r"\} is not a valid set of flavors" with pytest.raises(ValueError, match=msg): - read_html(url, "google", flavor=flavor) + read_html(url, match="google", flavor=flavor) @td.skip_if_no("bs4") @@ -121,13 +121,26 @@ def test_to_html_compat(self): res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] tm.assert_frame_equal(res, df) + @tm.network + def test_banklist_url_positional_match(self): + url = "http://www.fdic.gov/bank/individual/failed/banklist.html" + # Passing match argument as positional should cause a FutureWarning. + with tm.assert_produces_warning(FutureWarning): + df1 = self.read_html( + url, "First Federal Bank of Florida", attrs={"id": "table"} + ) + with tm.assert_produces_warning(FutureWarning): + df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"}) + + assert_framelist_equal(df1, df2) + @tm.network def test_banklist_url(self): url = "http://www.fdic.gov/bank/individual/failed/banklist.html" df1 = self.read_html( - url, "First Federal Bank of Florida", attrs={"id": "table"} + url, match="First Federal Bank of Florida", attrs={"id": "table"} ) - df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"}) + df2 = self.read_html(url, match="Metcalf Bank", attrs={"id": "table"}) assert_framelist_equal(df1, df2) @@ -137,21 +150,25 @@ def test_spam_url(self): "https://raw.githubusercontent.com/pandas-dev/pandas/master/" "pandas/tests/io/data/html/spam.html" ) - df1 = self.read_html(url, ".*Water.*") - df2 = self.read_html(url, "Unit") + df1 = self.read_html(url, match=".*Water.*") + df2 = self.read_html(url, match="Unit") assert_framelist_equal(df1, df2) @pytest.mark.slow def test_banklist(self): - df1 = self.read_html(self.banklist_data, ".*Florida.*", attrs={"id": "table"}) - df2 = self.read_html(self.banklist_data, "Metcalf Bank", attrs={"id": "table"}) + df1 = self.read_html( + self.banklist_data, match=".*Florida.*", attrs={"id": "table"} + ) + df2 = self.read_html( + self.banklist_data, match="Metcalf Bank", attrs={"id": "table"} + ) assert_framelist_equal(df1, df2) def test_spam(self): - df1 = self.read_html(self.spam_data, ".*Water.*") - df2 = self.read_html(self.spam_data, "Unit") + df1 = self.read_html(self.spam_data, match=".*Water.*") + df2 = self.read_html(self.spam_data, match="Unit") assert_framelist_equal(df1, df2) assert df1[0].iloc[0, 0] == "Proximates" @@ -168,81 +185,82 @@ def test_banklist_no_match(self): assert isinstance(df, DataFrame) def test_spam_header(self): - df = self.read_html(self.spam_data, ".*Water.*", header=2)[0] + df = self.read_html(self.spam_data, match=".*Water.*", header=2)[0] assert df.columns[0] == "Proximates" assert not df.empty def test_skiprows_int(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1) - df2 = self.read_html(self.spam_data, "Unit", skiprows=1) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=1) assert_framelist_equal(df1, df2) def test_skiprows_range(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=range(2))[0] - df2 = self.read_html(self.spam_data, "Unit", skiprows=range(2))[0] - tm.assert_frame_equal(df1, df2) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=range(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=range(2)) + + assert_framelist_equal(df1, df2) def test_skiprows_list(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=[1, 2]) - df2 = self.read_html(self.spam_data, "Unit", skiprows=[2, 1]) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=[1, 2]) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=[2, 1]) assert_framelist_equal(df1, df2) def test_skiprows_set(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows={1, 2}) - df2 = self.read_html(self.spam_data, "Unit", skiprows={2, 1}) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows={1, 2}) + df2 = self.read_html(self.spam_data, match="Unit", skiprows={2, 1}) assert_framelist_equal(df1, df2) def test_skiprows_slice(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1) - df2 = self.read_html(self.spam_data, "Unit", skiprows=1) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=1) assert_framelist_equal(df1, df2) def test_skiprows_slice_short(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2)) - df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(2)) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(2)) assert_framelist_equal(df1, df2) def test_skiprows_slice_long(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2, 5)) - df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(4, 1, -1)) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=slice(2, 5)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=slice(4, 1, -1)) assert_framelist_equal(df1, df2) def test_skiprows_ndarray(self): - df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=np.arange(2)) - df2 = self.read_html(self.spam_data, "Unit", skiprows=np.arange(2)) + df1 = self.read_html(self.spam_data, match=".*Water.*", skiprows=np.arange(2)) + df2 = self.read_html(self.spam_data, match="Unit", skiprows=np.arange(2)) assert_framelist_equal(df1, df2) def test_skiprows_invalid(self): with pytest.raises(TypeError, match=("is not a valid type for skipping rows")): - self.read_html(self.spam_data, ".*Water.*", skiprows="asdf") + self.read_html(self.spam_data, match=".*Water.*", skiprows="asdf") def test_index(self): - df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0) - df2 = self.read_html(self.spam_data, "Unit", index_col=0) + df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", index_col=0) assert_framelist_equal(df1, df2) def test_header_and_index_no_types(self): - df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0) - df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0) + df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0) assert_framelist_equal(df1, df2) def test_header_and_index_with_types(self): - df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0) - df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0) + df1 = self.read_html(self.spam_data, match=".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", header=1, index_col=0) assert_framelist_equal(df1, df2) def test_infer_types(self): # 10892 infer_types removed - df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0) - df2 = self.read_html(self.spam_data, "Unit", index_col=0) + df1 = self.read_html(self.spam_data, match=".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, match="Unit", index_col=0) assert_framelist_equal(df1, df2) def test_string_io(self): @@ -252,25 +270,25 @@ def test_string_io(self): with open(self.spam_data, **self.spam_data_kwargs) as f: data2 = StringIO(f.read()) - df1 = self.read_html(data1, ".*Water.*") - df2 = self.read_html(data2, "Unit") + df1 = self.read_html(data1, match=".*Water.*") + df2 = self.read_html(data2, match="Unit") assert_framelist_equal(df1, df2) def test_string(self): with open(self.spam_data, **self.spam_data_kwargs) as f: data = f.read() - df1 = self.read_html(data, ".*Water.*") - df2 = self.read_html(data, "Unit") + df1 = self.read_html(data, match=".*Water.*") + df2 = self.read_html(data, match="Unit") assert_framelist_equal(df1, df2) def test_file_like(self): with open(self.spam_data, **self.spam_data_kwargs) as f: - df1 = self.read_html(f, ".*Water.*") + df1 = self.read_html(f, match=".*Water.*") with open(self.spam_data, **self.spam_data_kwargs) as f: - df2 = self.read_html(f, "Unit") + df2 = self.read_html(f, match="Unit") assert_framelist_equal(df1, df2) @@ -292,7 +310,7 @@ def test_invalid_url(self): def test_file_url(self): url = self.banklist_data dfs = self.read_html( - file_path_to_url(os.path.abspath(url)), "First", attrs={"id": "table"} + file_path_to_url(os.path.abspath(url)), match="First", attrs={"id": "table"} ) assert isinstance(dfs, list) for df in dfs: @@ -308,7 +326,7 @@ def test_invalid_table_attrs(self): def _bank_data(self, *args, **kwargs): return self.read_html( - self.banklist_data, "Metcalf", attrs={"id": "table"}, *args, **kwargs + self.banklist_data, match="Metcalf", attrs={"id": "table"}, *args, **kwargs ) @pytest.mark.slow @@ -358,7 +376,7 @@ def test_regex_idempotency(self): def test_negative_skiprows(self): msg = r"\(you passed a negative value\)" with pytest.raises(ValueError, match=msg): - self.read_html(self.spam_data, "Water", skiprows=-1) + self.read_html(self.spam_data, match="Water", skiprows=-1) @tm.network def test_multiple_matches(self): @@ -600,7 +618,9 @@ def test_gold_canyon(self): raw_text = f.read() assert gc in raw_text - df = self.read_html(self.banklist_data, "Gold Canyon", attrs={"id": "table"})[0] + df = self.read_html( + self.banklist_data, match="Gold Canyon", attrs={"id": "table"} + )[0] assert gc in df.to_string() def test_different_number_of_cols(self): @@ -855,7 +875,7 @@ def test_wikipedia_states_table(self, datapath): data = datapath("io", "data", "html", "wikipedia_states.html") assert os.path.isfile(data), f"{repr(data)} is not a file" assert os.path.getsize(data), f"{repr(data)} is an empty file" - result = self.read_html(data, "Arizona", header=1)[0] + result = self.read_html(data, match="Arizona", header=1)[0] assert result.shape == (60, 12) assert "Unnamed" in result.columns[-1] assert result["sq mi"].dtype == np.dtype("float64") @@ -1065,7 +1085,7 @@ def test_works_on_valid_markup(self, datapath): @pytest.mark.slow def test_fallback_success(self, datapath): banklist_data = datapath("io", "data", "html", "banklist.html") - self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"]) + self.read_html(banklist_data, match=".*Water.*", flavor=["lxml", "html5lib"]) def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) diff --git a/pandas/tests/util/test_deprecate_nonkeyword_arguments.py b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py new file mode 100644 index 0000000000000..05bc617232bdd --- /dev/null +++ b/pandas/tests/util/test_deprecate_nonkeyword_arguments.py @@ -0,0 +1,101 @@ +""" +Tests for the `deprecate_nonkeyword_arguments` decorator +""" + +import warnings + +from pandas.util._decorators import deprecate_nonkeyword_arguments + +import pandas._testing as tm + + +@deprecate_nonkeyword_arguments(version="1.1", allowed_args=["a", "b"]) +def f(a, b=0, c=0, d=0): + return a + b + c + d + + +def test_one_argument(): + with tm.assert_produces_warning(None): + assert f(19) == 19 + + +def test_one_and_one_arguments(): + with tm.assert_produces_warning(None): + assert f(19, d=6) == 25 + + +def test_two_arguments(): + with tm.assert_produces_warning(None): + assert f(1, 5) == 6 + + +def test_two_and_two_arguments(): + with tm.assert_produces_warning(None): + assert f(1, 3, c=3, d=5) == 12 + + +def test_three_arguments(): + with tm.assert_produces_warning(FutureWarning): + assert f(6, 3, 3) == 12 + + +def test_four_arguments(): + with tm.assert_produces_warning(FutureWarning): + assert f(1, 2, 3, 4) == 10 + + +@deprecate_nonkeyword_arguments(version="1.1") +def g(a, b=0, c=0, d=0): + with tm.assert_produces_warning(None): + return a + b + c + d + + +def test_one_and_three_arguments_default_allowed_args(): + with tm.assert_produces_warning(None): + assert g(1, b=3, c=3, d=5) == 12 + + +def test_three_arguments_default_allowed_args(): + with tm.assert_produces_warning(FutureWarning): + assert g(6, 3, 3) == 12 + + +def test_three_positional_argument_with_warning_message_analysis(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert g(6, 3, 3) == 12 + assert len(w) == 1 + for actual_warning in w: + assert actual_warning.category == FutureWarning + assert str(actual_warning.message) == ( + "Starting with Pandas version 1.1 all arguments of g " + "except for the argument 'a' will be keyword-only" + ) + + +@deprecate_nonkeyword_arguments(version="1.1") +def h(a=0, b=0, c=0, d=0): + return a + b + c + d + + +def test_all_keyword_arguments(): + with tm.assert_produces_warning(None): + assert h(a=1, b=2) == 3 + + +def test_one_positional_argument(): + with tm.assert_produces_warning(FutureWarning): + assert h(23) == 23 + + +def test_one_positional_argument_with_warning_message_analysis(): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert h(19) == 19 + assert len(w) == 1 + for actual_warning in w: + assert actual_warning.category == FutureWarning + assert str(actual_warning.message) == ( + "Starting with Pandas version 1.1 all arguments " + "of h will be keyword-only" + ) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 7a804792174c7..71d02db10c7ba 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -216,6 +216,105 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return _deprecate_kwarg +def _format_argument_list(allow_args: Union[List[str], int]): + """ + Convert the allow_args argument (either string or integer) of + `deprecate_nonkeyword_arguments` function to a string describing + it to be inserted into warning message. + + Parameters + ---------- + allowed_args : list, tuple or int + The `allowed_args` argument for `deprecate_nonkeyword_arguments`, + but None value is not allowed. + + Returns + ------- + s : str + The substring describing the argument list in best way to be + inserted to the warning message. + + Examples + -------- + `format_argument_list(0)` -> '' + `format_argument_list(1)` -> 'except for the first argument' + `format_argument_list(2)` -> 'except for the first 2 arguments' + `format_argument_list([])` -> '' + `format_argument_list(['a'])` -> "except for the arguments 'a'" + `format_argument_list(['a', 'b'])` -> "except for the arguments 'a' and 'b'" + `format_argument_list(['a', 'b', 'c'])` -> + "except for the arguments 'a', 'b' and 'c'" + """ + if not allow_args: + return "" + elif allow_args == 1: + return " except for the first argument" + elif isinstance(allow_args, int): + return " except for the first {num_args} arguments".format(num_args=allow_args) + elif len(allow_args) == 1: + return " except for the argument '{arg}'".format(arg=allow_args[0]) + else: + last = allow_args[-1] + args = ", ".join(["'" + x + "'" for x in allow_args[:-1]]) + return " except for the arguments {args} and '{last}'".format( + args=args, last=last + ) + + +def deprecate_nonkeyword_arguments( + version: str, + allowed_args: Optional[Union[List[str], int]] = None, + stacklevel: int = 2, +) -> Callable: + """ + Decorator to deprecate a use of non-keyword arguments of a function. + + Parameters + ---------- + version : str + The version in which positional arguments will become + keyword-only. + + allowed_args : list or int, optional + In case of list, it must be the list of names of some + first arguments of the decorated functions that are + OK to be given as positional arguments. In case of an + integer, this is the number of positional arguments + that will stay positional. In case of None value, + defaults to list of all arguments not having the + default value. + + stacklevel : int, default=2 + The stack level for warnings.warn + """ + + def decorate(func): + if allowed_args is not None: + allow_args = allowed_args + else: + spec = inspect.getfullargspec(func) + allow_args = spec.args[: -len(spec.defaults)] + + @wraps(func) + def wrapper(*args, **kwargs): + arguments = _format_argument_list(allow_args) + if isinstance(allow_args, (list, tuple)): + num_allow_args = len(allow_args) + else: + num_allow_args = allow_args + if len(args) > num_allow_args: + msg = ( + "Starting with Pandas version {version} all arguments of {funcname}" + "{except_args} will be keyword-only" + ).format(version=version, funcname=func.__name__, except_args=arguments) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + return func(*args, **kwargs) + + return wrapper + + return decorate + + def rewrite_axis_style_signature( name: str, extra_params: List[Tuple[str, Any]] ) -> Callable[..., Any]: