diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c0e03b48..98935c05 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -117,10 +117,6 @@ repos: - id: check-manifest args: [--no-build-isolation] additional_dependencies: [setuptools-scm, toml] -- repo: https://github.com/guilatrova/tryceratops - rev: v1.0.1 - hooks: - - id: tryceratops - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.942' hooks: diff --git a/docs/source/_static/images/warning.svg b/docs/source/_static/images/warning.svg new file mode 100644 index 00000000..97ccbb95 --- /dev/null +++ b/docs/source/_static/images/warning.svg @@ -0,0 +1,146 @@ + + + + +
+
+
+ + + + + +
pytask
+
+
+
──────────────────────────────────────── Start pytask session ────────────────────────────────────────
+
Platform: win32 -- Python 3.10.0, pytask 0.2.1, pluggy 1.0.0
+
Root: C:\Users\pytask-dev\git\pytask-examples
+
Collected 1 task.
+
+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
+
Task Outcome
+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
+
task_warning.py::task_warning.
+
└───────────────────────────────┴─────────┘
+
+
──────────────────────────────────────────────────────────────────────────────────────────────────────
+
╭─────────── Summary ────────────╮
+
1 Collected tasks
+
1 Succeeded (100.0%)
+
╰────────────────────────────────╯
+
───────────────────────────────────── Succeeded in 0.24 seconds ──────────────────────────────────────
+
╭───────────────────────────────────────────── Warnings ─────────────────────────────────────────────╮
+
│ task_warning.py::task_warning │
+
│ C:\Users\pytask-dev\git\pytask-examples\task_warning.py:8: SettingWithCopyWarning: │
+
│ A value is trying to be set on a copy of a slice from a DataFrame. │
+
│ Try using .loc = value instead │
+
│ │
+
│ See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/ │
+
│ indexing.html#returning-a-view-versus-a-copy │
+
│ df[df["a"] < 5]["b"] = 1 │
+
│ │
+
https://pytask-dev.rtdf.io/en/stable/how_to_guides/capture_warnings.html │
+
╰────────────────────────────────────────────────────────────────────────────────────────────────────╯
+
+
+
+
+ +
+
diff --git a/docs/source/_static/images/write-a-task.svg b/docs/source/_static/images/write-a-task.svg index fce5385a..7554a703 100644 --- a/docs/source/_static/images/write-a-task.svg +++ b/docs/source/_static/images/write-a-task.svg @@ -116,7 +116,7 @@
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
Task Outcome
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
-
task_data_preparation.py::task_create_random_data.
+
task_data_preparation.py::task_create_random_data.
└───────────────────────────────────────────────────┴─────────┘
──────────────────────────────────────────────────────────────────────────────────────────────────────
diff --git a/docs/source/changes.md b/docs/source/changes.md index 4fd63b87..d26da458 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -7,8 +7,10 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## 0.2.1 - 2022-xx-xx -- {pull}`261` adds a config file option to sort entries in live table - {pull}`259` adds an `.svg` for profiling tasks. +- {pull}`261` adds a config file option to sort entries in live table +- {pull}`262` allows pytask to capture warnings. Here is the + [guide](https://pytask-dev.readthedocs.io/en/stable/how_to_guides/capture_warnings.html). ## 0.2.0 - 2022-04-14 diff --git a/docs/source/how_to_guides/capture_warnings.md b/docs/source/how_to_guides/capture_warnings.md new file mode 100644 index 00000000..b012cce4 --- /dev/null +++ b/docs/source/how_to_guides/capture_warnings.md @@ -0,0 +1,130 @@ +# Capture warnings + +pytask captures warnings during the execution. + +Here is an example with the most infamous warning in the world of scientific Python. + +```python +import pandas as pd +import pytask + + +def _create_df(): + df = pd.DataFrame({"a": range(10), "b": range(10, 20)}) + df[df["a"] < 5]["b"] = 1 + return df + + +@pytask.mark.products("df.pkl") +def task_warning(produces): + df = _create_df() + df.to_pickle(produces) +``` + +Running pytask produces + +```{image} /_static/images/warning.svg +``` + +## Controlling warnings + +You can use the `filterwarnings` option in `pyproject.toml` to configure pytasks +behavior to warnings. For example, the configuration below will ignore all user warnings +and specific deprecation warnings matching a regex, but will transform all other +warnings into errors. + +```toml +[tool.pytask.ini_options] +filterwarnings = [ + "error", + "ignore::UserWarning", + # note the use of single quote below to denote "raw" strings in TOML + 'ignore:function ham\(\) is deprecated:DeprecationWarning', +] +``` + +When a warning matches more than one option in the list, the action for the last +matching option is performed. + +## `@pytask.mark.filterwarnings` + +You can use the `@pytask.mark.filterwarnings` to add warning filters to specific test +items, allowing you to have finer control of which warnings should be captured at test, +class or even module level: + +```python +import pandas as pd +import pytask + + +def _create_df(): + df = pd.DataFrame({"a": range(10), "b": range(10, 20)}) + df[df["a"] < 5]["b"] = 1 + return df + + +@pytask.mark.filterwarnings("ignore:.*:SettingWithCopyWarning") +@pytask.mark.products("df.pkl") +def task_warning(produces): + df = _create_df() + df.to_pickle(produces) +``` + +Filters applied using a mark take precedence over filters passed on the command line or +configured by the `filterwarnings` configuration option. + +## Disabling warnings summary + +Although not recommended, you can use the `--disable-warnings` command-line option to +suppress the warning summary entirely from the test run output. + +## `DeprecationWarning` and `PendingDeprecationWarning` + +By default pytask will display `DeprecationWarning` and `PendingDeprecationWarning` +warnings from user code and third-party libraries. This helps users keep their code +modern and avoid breakages when deprecated warnings are effectively removed. + +Sometimes it is useful to hide some specific deprecation warnings that happen in code +that you have no control over (such as third-party libraries), in which case you might +use the warning filters options (ini or marks) to ignore those warnings. + +For example: + +```toml +[tool.pytask.ini_options] +filterwarnings = [ + "ignore:.*U.*mode is deprecated:DeprecationWarning" +] +``` + +This will ignore all warnings of type `DeprecationWarning` where the start of the +message matches the regular expression `".*U.*mode is deprecated"`. + +## Debugging warnings + +Sometimes it is not clear which line of code triggered a warning. To find the location, +you can turn warnings into exceptions and then use the {option}`pytask build --pdb` flag +to enter the debugger. + +You can use the configuration to convert warnings to errors by setting + +```toml +[tool.pytask.ini_options] +filterwarnings = ["error:.*"] +``` + +and then run `pytask`. + +Or, you use a temporary environment variable. Here is an example for bash + +```console +$ PYTHONWARNINGS=error pytask --pdb +``` + +and here for Powershell + +```console +$ $env:PYTHONWARNINGS = 'error' +$ pytask +$ Remove-Item env:\PYTHONWARNINGS +``` diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md index 32b4be37..1a310b41 100644 --- a/docs/source/how_to_guides/index.md +++ b/docs/source/how_to_guides/index.md @@ -12,6 +12,7 @@ specific tasks with pytask. maxdepth: 1 --- invoking_pytask_extended +capture_warnings repeating_tasks_with_different_inputs_the_pytest_way how_to_influence_build_order how_to_write_a_plugin diff --git a/scripts/svgs/task_warning.py b/scripts/svgs/task_warning.py new file mode 100644 index 00000000..3b38286b --- /dev/null +++ b/scripts/svgs/task_warning.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import pandas as pd +import pytask +from click.testing import CliRunner + + +def _create_df(): + df = pd.DataFrame({"a": range(10), "b": range(10, 20)}) + df[df["a"] < 5]["b"] = 1 + return df + + +@pytask.mark.produces("df.pkl") +def task_warning(produces): + df = _create_df() + df.to_pickle(produces) + + +if __name__ == "__main__": + runner = CliRunner() + + pytask.console.record = True + runner.invoke(pytask.cli, [__file__]) + pytask.console.save_svg("warning.svg", title="pytask") diff --git a/src/_pytask/cli.py b/src/_pytask/cli.py index 837c4009..9133fa49 100644 --- a/src/_pytask/cli.py +++ b/src/_pytask/cli.py @@ -68,6 +68,7 @@ def pytask_add_hooks(pm: pluggy.PluginManager) -> None: from _pytask import resolve_dependencies from _pytask import skipping from _pytask import task + from _pytask import warnings pm.register(build) pm.register(capture) @@ -89,6 +90,7 @@ def pytask_add_hooks(pm: pluggy.PluginManager) -> None: pm.register(resolve_dependencies) pm.register(skipping) pm.register(task) + pm.register(warnings) @click.group( diff --git a/src/_pytask/session.py b/src/_pytask/session.py index 0c0cd086..11d45f4c 100644 --- a/src/_pytask/session.py +++ b/src/_pytask/session.py @@ -10,7 +10,7 @@ import attr import networkx as nx from _pytask.outcomes import ExitCode - +from _pytask.warnings_utils import WarningReport # Location was moved from pluggy v0.13.1 to v1.0.0. try: @@ -62,6 +62,7 @@ class Session: scheduler = attr.ib(default=None, type=Any) should_stop = attr.ib(default=False, type=Optional[bool]) """Optional[bool]: Indicates whether the session should be stopped.""" + warnings = attr.ib(factory=list, type=List[WarningReport]) @classmethod def from_config(cls, config: dict[str, Any]) -> Session: diff --git a/src/_pytask/warnings.py b/src/_pytask/warnings.py new file mode 100644 index 00000000..523616c6 --- /dev/null +++ b/src/_pytask/warnings.py @@ -0,0 +1,280 @@ +"""This module contains code for capturing warnings.""" +from __future__ import annotations + +import functools +import re +import sys +import textwrap +import warnings +from collections import defaultdict +from contextlib import contextmanager +from typing import Any +from typing import cast +from typing import Dict +from typing import Generator +from typing import List + +import attr +import click +from _pytask.config import hookimpl +from _pytask.console import console +from _pytask.mark_utils import get_marks +from _pytask.nodes import Task +from _pytask.outcomes import Exit +from _pytask.session import Session +from _pytask.warnings_utils import WarningReport +from rich.console import Console +from rich.console import ConsoleOptions +from rich.console import RenderResult +from rich.padding import Padding +from rich.panel import Panel + + +@hookimpl +def pytask_extend_command_line_interface(cli: click.Group) -> None: + """Extend the cli.""" + cli.commands["build"].params.append( + click.Option( + ["--disable-warnings"], + is_flag=True, + default=False, + help="Disables the summary for warnings.", + ) + ) + + +@hookimpl +def pytask_parse_config( + config: dict[str, Any], + config_from_file: dict[str, Any], + config_from_cli: dict[str, Any], +) -> None: + """Parse the configuration.""" + config["disable_warnings"] = config_from_cli.get("disable_warnings", False) + config["filterwarnings"] = _parse_filterwarnings( + config_from_file.get("filterwarnings") + ) + config["markers"]["filterwarnings"] = "Add a filter for a warning to a task." + + +@hookimpl +def pytask_post_parse(config: dict[str, Any]) -> None: + """Activate the warnings plugin if not disabled.""" + if not config["disable_warnings"]: + config["pm"].register(WarningsNameSpace) + + +def _parse_filterwarnings(x: str | list[str] | None) -> list[str]: + """Parse filterwarnings.""" + if x is None: + return [] + elif isinstance(x, str): + return [i.strip() for i in x.split("\n")] + elif isinstance(x, list): + return [i.strip() for i in x] + else: + raise TypeError("'filterwarnings' must be a str, list[str] or None.") + + +@contextmanager +def catch_warnings_for_item( + session: Session, + task: Task | None = None, + when: str | None = None, +) -> Generator[None, None, None]: + """Context manager that catches warnings generated in the contained execution block. + ``item`` can be None if we are not in the context of an item execution. + Each warning captured triggers the ``pytest_warning_recorded`` hook. + """ + with warnings.catch_warnings(record=True) as log: + # mypy can't infer that record=True means log is not None; help it. + assert log is not None + + if not sys.warnoptions: + # If user is not explicitly configuring warning filters, show deprecation + # warnings by default (#2908). + warnings.filterwarnings("always", category=DeprecationWarning) + warnings.filterwarnings("always", category=PendingDeprecationWarning) + + for arg in session.config["filterwarnings"]: + warnings.filterwarnings(*parse_warning_filter(arg, escape=False)) + + # apply filters from "filterwarnings" marks + if task is not None: + for mark in get_marks(task, "filterwarnings"): + for arg in mark.args: + warnings.filterwarnings(*parse_warning_filter(arg, escape=False)) + + yield + + if task is not None: + id_ = task.short_name + else: + id_ = when + + for warning_message in log: + fs_location = warning_message.filename, warning_message.lineno + session.warnings.append( + WarningReport( + message=warning_record_to_str(warning_message), + fs_location=fs_location, + id_=id_, + ) + ) + + +@functools.lru_cache(maxsize=50) +def parse_warning_filter( + arg: str, *, escape: bool +) -> tuple[warnings._ActionKind, str, type[Warning], str, int]: + """Parse a warnings filter string. + + This is copied from warnings._setoption with the following changes: + + - Does not apply the filter. + - Escaping is optional. + - Raises UsageError so we get nice error messages on failure. + + """ + __tracebackhide__ = True + error_template = textwrap.dedent( + f"""\ + while parsing the following warning configuration: + {arg} + This error occurred: + {{error}} + """ + ) + + parts = arg.split(":") + if len(parts) > 5: + doc_url = ( + "https://docs.python.org/3/library/warnings.html#describing-warning-filters" + ) + error = textwrap.dedent( + f"""\ + Too many fields ({len(parts)}), expected at most 5 separated by colons: + action:message:category:module:line + For more information please consult: {doc_url} + """ + ) + raise Exit(error_template.format(error=error)) + + while len(parts) < 5: + parts.append("") + action_, message, category_, module, lineno_ = (s.strip() for s in parts) + try: + action: warnings._ActionKind = warnings._getaction(action_) # type: ignore + except warnings._OptionError as e: + raise Exit(error_template.format(error=str(e))) + try: + category: type[Warning] = _resolve_warning_category(category_) + except Exit as e: + raise Exit(str(e)) + if message and escape: + message = re.escape(message) + if module and escape: + module = re.escape(module) + r"\Z" + if lineno_: + try: + lineno = int(lineno_) + if lineno < 0: + raise ValueError("number is negative") + except ValueError as e: + raise Exit(error_template.format(error=f"invalid lineno {lineno_!r}: {e}")) + else: + lineno = 0 + return action, message, category, module, lineno + + +def _resolve_warning_category(category: str) -> type[Warning]: + """ + Copied from warnings._getcategory, but changed so it lets exceptions (specially + ImportErrors) propagate so we can get access to their tracebacks (#9218). + + """ + __tracebackhide__ = True + if not category: + return Warning + + if "." not in category: + import builtins as m + + klass = category + else: + module, _, klass = category.rpartition(".") + m = __import__(module, None, None, [klass]) + cat = getattr(m, klass) + if not issubclass(cat, Warning): + raise Exception(f"{cat} is not a Warning subclass") + return cast(type[Warning], cat) + + +def warning_record_to_str(warning_message: warnings.WarningMessage) -> str: + """Convert a warnings.WarningMessage to a string.""" + msg = warnings.formatwarning( + message=warning_message.message, + category=warning_message.category, + filename=warning_message.filename, + lineno=warning_message.lineno, + line=warning_message.line, + ) + return msg + + +class WarningsNameSpace: + """A namespace for the warnings plugin.""" + + @staticmethod + @hookimpl(hookwrapper=True) + def pytask_collect(session: Session) -> Generator[None, None, None]: + """Catch warnings while executing a task.""" + with catch_warnings_for_item(session=session): + yield + + @staticmethod + @hookimpl(hookwrapper=True) + def pytask_execute_task( + session: Session, task: Task + ) -> Generator[None, None, None]: + """Catch warnings while executing a task.""" + with catch_warnings_for_item(session=session, task=task): + yield + + @staticmethod + @hookimpl(trylast=True) + def pytask_log_session_footer(session: Session) -> None: + """Log warnings at the end of a session.""" + if session.warnings: + grouped_warnings = defaultdict(list) + for warning in session.warnings: + location = ( + warning.id_ + if warning.id_ is not None + else "{}:{}".format(*warning.fs_location) + ) + grouped_warnings[warning.message].append(location) + sorted_gw = {k: sorted(v) for k, v in grouped_warnings.items()} + + renderable = MyRenderable(sorted_gw) + + panel = Panel(renderable, title="Warnings", style="warning") + console.print(panel) + + +@attr.s +class MyRenderable: + """A renderable for warnings.""" + + grouped_warnings = attr.ib(type=Dict[str, List[str]]) + + def __rich_console__( + self, console: Console, options: ConsoleOptions # noqa: U100 + ) -> RenderResult: + for message, locations in self.grouped_warnings.items(): + yield from locations + yield Padding.indent(message, 4) + yield ( + "[bold red]♥[/bold red] " + + "https://pytask-dev.rtdf.io/en/stable/how_to_guides/capture_warnings.html" + ) diff --git a/src/_pytask/warnings_utils.py b/src/_pytask/warnings_utils.py new file mode 100644 index 00000000..7fd265ac --- /dev/null +++ b/src/_pytask/warnings_utils.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from typing import Tuple + +import attr + + +@attr.s(kw_only=True) +class WarningReport: + message = attr.ib(type=str) + fs_location = attr.ib(type=Tuple[str, int]) + id_ = attr.ib(type=str) diff --git a/tests/test_mark.py b/tests/test_mark.py index 17082ddd..ee6c84d9 100644 --- a/tests/test_mark.py +++ b/tests/test_mark.py @@ -359,11 +359,11 @@ def task_second(depends_on): """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) - with pytest.warns(UserWarning, match="Unknown pytask.mark.wip"): - result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) + result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) assert result.exit_code == ExitCode.OK assert "2 Succeeded" in result.output + assert "Warnings" in result.output @pytest.mark.end_to_end @@ -402,12 +402,12 @@ def task_second(): """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) - with pytest.warns(UserWarning, match="Unknown pytask.mark.wip"): - result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) + result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) assert result.exit_code == ExitCode.OK assert "1 Succeeded" in result.output assert "1 Skipped" in result.output + assert "Warnings" in result.output @pytest.mark.end_to_end @@ -421,8 +421,8 @@ def task_example(): """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) - with pytest.warns(UserWarning, match="Unknown pytask.mark.wip"): - result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) + result = runner.invoke(cli, [tmp_path.as_posix(), "-m", "wip"]) assert result.exit_code == ExitCode.OK assert "1 Succeeded" in result.output + assert "Warnings" in result.output diff --git a/tests/test_warnings.py b/tests/test_warnings.py new file mode 100644 index 00000000..160693ee --- /dev/null +++ b/tests/test_warnings.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import textwrap + +import pytest +from pytask import cli +from pytask import ExitCode +from pytask import main + + +@pytest.mark.parametrize( + "disable_warnings", + [pytest.param(True, marks=pytest.mark.filterwarnings("ignore:warning!!!")), False], +) +def test_disable_warnings_cli(tmp_path, runner, disable_warnings): + source = """ + import warnings + + def task_example(): + warnings.warn("warning!!!") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + flag = ["--disable-warnings"] if disable_warnings else [] + result = runner.invoke(cli, [tmp_path.as_posix()] + flag) + + assert result.exit_code == ExitCode.OK + assert ("Warnings" in result.output) is not disable_warnings + assert ("warning!!!" in result.output) is not disable_warnings + + +@pytest.mark.parametrize( + "disable_warnings", + [pytest.param(True, marks=pytest.mark.filterwarnings("ignore:warning!!!")), False], +) +def test_disable_warnings(tmp_path, disable_warnings): + source = """ + import warnings + + def task_example(): + warnings.warn("warning!!!") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + session = main({"paths": tmp_path, "disable_warnings": disable_warnings}) + + assert session.exit_code == ExitCode.OK + if disable_warnings: + assert session.warnings == [] + else: + assert len(session.warnings) == 1 + assert "warning!!!" in session.warnings[0].message + + +@pytest.mark.parametrize("add_marker", [False, True]) +def test_disable_warnings_with_mark(tmp_path, runner, add_marker): + if add_marker: + decorator = "@pytask.mark.filterwarnings('ignore:warning!!!')" + else: + decorator = "" + + source = f""" + import pytask + import warnings + + {decorator} + def task_example(): + warnings.warn("warning!!!") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert ("Warnings" in result.output) is not add_marker + assert ("warning!!!" in result.output) is not add_marker + + +@pytest.mark.parametrize( + "disable_warnings", + [pytest.param(True, marks=pytest.mark.filterwarnings("ignore:warning!!!")), False], +) +def test_disable_warnings_cli_collection(tmp_path, runner, disable_warnings): + source = """ + import warnings + + warnings.warn("warning!!!") + + def task_example(): + ... + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + flag = ["--disable-warnings"] if disable_warnings else [] + result = runner.invoke(cli, [tmp_path.as_posix()] + flag) + + assert result.exit_code == ExitCode.OK + assert ("Warnings" in result.output) is not disable_warnings + assert ("warning!!!" in result.output) is not disable_warnings + + +@pytest.mark.parametrize("add_config", [False, True]) +def test_disable_warnings_with_config(tmp_path, runner, add_config): + if add_config: + tmp_path.joinpath("pyproject.toml").write_text( + "[tool.pytask.ini_options]\nfilterwarnings = ['ignore:warning!!!']" + ) + + source = """ + import warnings + + def task_example(): + warnings.warn("warning!!!") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert ("Warnings" in result.output) is not add_config + assert ("warning!!!" in result.output) is not add_config