From ba14937b543f8a208c4f74184e060aa0a195fcd0 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 12 Jun 2021 15:50:48 +0200 Subject: [PATCH 1/4] Add new command pytask dag to export dag. --- src/_pytask/build.py | 6 +- src/_pytask/cli.py | 2 + src/_pytask/graph.py | 204 +++++++++++++++++++++++++++++ src/_pytask/parameters.py | 2 +- src/pytask/__init__.py | 3 +- tests/test_graph.py | 84 ++++++++++++ tests/test_resolve_dependencies.py | 2 +- tox.ini | 9 +- 8 files changed, 304 insertions(+), 8 deletions(-) create mode 100644 src/_pytask/graph.py create mode 100644 tests/test_graph.py diff --git a/src/_pytask/build.py b/src/_pytask/build.py index 63a38c4d..b0bf8a6c 100644 --- a/src/_pytask/build.py +++ b/src/_pytask/build.py @@ -1,9 +1,9 @@ """Implement the build command.""" import sys -import traceback import click from _pytask.config import hookimpl +from _pytask.console import console from _pytask.enums import ExitCode from _pytask.exceptions import CollectionError from _pytask.exceptions import ConfigurationError @@ -50,7 +50,7 @@ def main(config_from_cli): session = Session.from_config(config) except (ConfigurationError, Exception): - traceback.print_exception(*sys.exc_info()) + console.print_exception() session = Session({}, None) session.exit_code = ExitCode.CONFIGURATION_FAILED @@ -71,7 +71,7 @@ def main(config_from_cli): session.exit_code = ExitCode.FAILED except Exception: - traceback.print_exception(*sys.exc_info()) + console.print_exception() session.exit_code = ExitCode.FAILED return session diff --git a/src/_pytask/cli.py b/src/_pytask/cli.py index 380e5134..c76eddb0 100644 --- a/src/_pytask/cli.py +++ b/src/_pytask/cli.py @@ -46,6 +46,7 @@ def pytask_add_hooks(pm): from _pytask import database from _pytask import debugging from _pytask import execute + from _pytask import graph from _pytask import logging from _pytask import mark from _pytask import parameters @@ -64,6 +65,7 @@ def pytask_add_hooks(pm): pm.register(database) pm.register(debugging) pm.register(execute) + pm.register(graph) pm.register(logging) pm.register(mark) pm.register(parameters) diff --git a/src/_pytask/graph.py b/src/_pytask/graph.py new file mode 100644 index 00000000..7bf69359 --- /dev/null +++ b/src/_pytask/graph.py @@ -0,0 +1,204 @@ +"""This file contains the command and code for drawing the DAG.""" +import shutil +from pathlib import Path +from typing import Any +from typing import Dict + +import click +import networkx as nx +from _pytask.config import hookimpl +from _pytask.console import console +from _pytask.dag import descending_tasks +from _pytask.enums import ColorCode +from _pytask.enums import ExitCode +from _pytask.exceptions import CollectionError +from _pytask.exceptions import ConfigurationError +from _pytask.exceptions import ResolvingDependenciesError +from _pytask.nodes import reduce_names_of_multiple_nodes +from _pytask.pluginmanager import get_plugin_manager +from _pytask.session import Session +from _pytask.shared import get_first_non_none_value + + +@hookimpl(tryfirst=True) +def pytask_extend_command_line_interface(cli: click.Group): + """Extend the command line interface.""" + cli.add_command(dag) + + +@hookimpl +def pytask_parse_config(config, config_from_cli, config_from_file): + """Parse configuration.""" + config["output_path"] = get_first_non_none_value( + config_from_cli, + config_from_file, + key="output_path", + default=Path.cwd() / "dag.pdf", + callback=lambda x: None if x is None else Path(x), + ) + config["layout"] = get_first_non_none_value( + config_from_cli, + config_from_file, + key="layout", + default="dot", + ) + + +_HELP_TEXT_LAYOUT = ( + "The layout determines the structure of the graph. Here you find an overview of " + "all available layouts: https://graphviz.org/#roadmap." +) + + +_HELP_TEXT_OUTPUT = ( + "The output path of the visualization. The format is inferred from the file " + "extension." +) + + +@click.command() +@click.option("-l", "--layout", type=str, default=None, help=_HELP_TEXT_LAYOUT) +@click.option("-o", "--output-path", type=str, default=None, help=_HELP_TEXT_OUTPUT) +def dag(**config_from_cli): + """Create a visualization of the project's DAG.""" + session = _create_session(config_from_cli) + dag = _refine_dag(session) + _write_graph(dag, session.config["output_path"], session.config["layout"]) + + +def build_dag(config_from_cli: Dict[str, Any]) -> "pydot.Dot": # noqa: F821 + """Build the DAG. + + This function is the programmatic interface to ``pytask dag`` and returns a + preprocessed :class:`pydot.Dot` which makes plotting easier than with matplotlib. + + To change the style of the graph, it might be easier to convert the graph back to + networkx, set attributes, and convert back to pydot or pygraphviz. + + Parameters + ---------- + config_from_cli : Dict[str, Any] + The configuration usually received from the CLI. For example, use ``{"paths": + "example-directory/"}`` to collect tasks from a directory. + + Returns + ------- + pydot.Dot + A preprocessed graph which can be customized and exported. + + """ + session = _create_session(config_from_cli) + dag = _refine_dag(session) + return dag + + +def _refine_dag(session): + dag = _shorten_node_labels(session.dag, session.config["paths"]) + dag = _add_root_node(dag) + dag = _clean_dag(dag) + dag = _style_dag(dag) + dag = _escape_node_names_with_colons(dag) + + return dag + + +def _create_session(config_from_cli: Dict[str, Any]) -> nx.DiGraph: + try: + pm = get_plugin_manager() + from _pytask import cli + + pm.register(cli) + pm.hook.pytask_add_hooks(pm=pm) + + config = pm.hook.pytask_configure(pm=pm, config_from_cli=config_from_cli) + + session = Session.from_config(config) + + except (ConfigurationError, Exception): + console.print_exception() + session = Session({}, None) + session.exit_code = ExitCode.CONFIGURATION_FAILED + + else: + try: + session.hook.pytask_log_session_header(session=session) + session.hook.pytask_collect(session=session) + session.hook.pytask_resolve_dependencies(session=session) + + except CollectionError: + session.exit_code = ExitCode.COLLECTION_FAILED + + except ResolvingDependenciesError: + session.exit_code = ExitCode.RESOLVING_DEPENDENCIES_FAILED + + except Exception: + session.exit_code = ExitCode.FAILED + console.print_exception() + console.rule(style=ColorCode.FAILED) + + return session + + +def _shorten_node_labels(dag, paths): + node_names = dag.nodes + short_names = reduce_names_of_multiple_nodes(node_names, dag, paths) + old_to_new = dict(zip(node_names, short_names)) + dag = nx.relabel_nodes(dag, old_to_new) + return dag + + +def _add_root_node(dag): + tasks_without_predecessor = [ + name + for name in dag.nodes + if len(list(descending_tasks(name, dag))) == 0 and "task" in dag.nodes[name] + ] + if tasks_without_predecessor: + dag.add_node("root") + for name in tasks_without_predecessor: + dag.add_edge("root", name) + + return dag + + +def _clean_dag(dag): + """Clean the DAG.""" + for node in dag.nodes: + dag.nodes[node].clear() + return dag + + +def _style_dag(dag: nx.DiGraph) -> nx.DiGraph: + shapes = {name: "hexagon" if "::task_" in name else "box" for name in dag.nodes} + nx.set_node_attributes(dag, shapes, "shape") + return dag + + +def _escape_node_names_with_colons(dag: nx.DiGraph): + """Escape node names with colons. + + pydot cannot handle colons in node names since it messes up some syntax. Escaping + works by wrapping the string in double quotes. See this issue for more information: + https://github.com/pydot/pydot/issues/224. + + """ + return nx.relabel_nodes(dag, {name: f'"{name}"' for name in dag.nodes}) + + +def _write_graph(dag: nx.DiGraph, path: Path, layout: str) -> None: + try: + import pydot # noqa: F401 + except ImportError: + raise ImportError( + "To visualize the project's DAG you need to install pydot which is " + "available with pip and conda." + ) from None + if shutil.which(layout) is None: + raise RuntimeError( + "The layout program '{layout}' could not be found on your PATH. Please, " + "install graphviz. It is, for example, available with conda." + ) + + path.parent.mkdir(exist_ok=True, parents=True) + graph = nx.nx_pydot.to_pydot(dag) + graph.write(path, prog=layout, format=path.suffix[1:]) diff --git a/src/_pytask/parameters.py b/src/_pytask/parameters.py index 894520f2..6545f886 100644 --- a/src/_pytask/parameters.py +++ b/src/_pytask/parameters.py @@ -35,5 +35,5 @@ def pytask_extend_command_line_interface(cli): cli.commands[command].params.append(_CONFIG_OPTION) for command in ["build", "clean", "collect", "profile"]: cli.commands[command].params.append(_IGNORE_OPTION) - for command in ["build", "clean", "collect", "profile"]: + for command in ["build", "clean", "collect", "dag", "profile"]: cli.commands[command].params.append(_PATH_ARGUMENT) diff --git a/src/pytask/__init__.py b/src/pytask/__init__.py index 51f3201c..91b40312 100644 --- a/src/pytask/__init__.py +++ b/src/pytask/__init__.py @@ -2,7 +2,8 @@ from _pytask.build import main from _pytask.cli import cli from _pytask.config import hookimpl +from _pytask.graph import build_dag from _pytask.mark import MARK_GEN as mark # noqa: N811 -__all__ = ["__version__", "cli", "hookimpl", "main", "mark"] +__all__ = ["__version__", "build_dag", "cli", "hookimpl", "main", "mark"] diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 00000000..07130a56 --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,84 @@ +import shutil +import textwrap + +import pytest +from pytask import cli + +try: + import pydot # noqa: F401 +except ImportError: + _IS_PYDOT_INSTALLED = False +else: + _IS_PYDOT_INSTALLED = True + +_GRAPH_LAYOUTS = ["neato", "dot", "fdp", "sfdp", "twopi", "circo"] + + +_PARAMETRIZED_LAYOUTS = [ + pytest.param( + layout, + marks=pytest.mark.skip(reason=f"{layout} not available") + if shutil.which(layout) is None + else [], + ) + for layout in _GRAPH_LAYOUTS +] + + +_TEST_FORMATS = ["dot", "pdf", "png", "jpeg", "svg"] + + +@pytest.mark.skipif(not _IS_PYDOT_INSTALLED, reason="pydot is required") +@pytest.mark.parametrize("layout", _PARAMETRIZED_LAYOUTS) +@pytest.mark.parametrize("format_", _TEST_FORMATS) +def test_create_graph_via_cli(tmp_path, runner, format_, layout): + source = """ + import pytask + + @pytask.mark.depends_on("input.txt") + def task_example(): pass + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("input.txt").touch() + + result = runner.invoke( + cli, + [ + "dag", + tmp_path.as_posix(), + "-o", + tmp_path.joinpath(f"dag.{format_}"), + "-l", + layout, + ], + ) + + assert result.exit_code == 0 + assert tmp_path.joinpath(f"dag.{format_}").exists() + + +@pytest.mark.skipif(not _IS_PYDOT_INSTALLED, reason="pydot is required") +@pytest.mark.parametrize("layout", _PARAMETRIZED_LAYOUTS) +@pytest.mark.parametrize("format_", _TEST_FORMATS) +def test_create_graph_via_task(tmp_path, runner, format_, layout): + source = f""" + import pytask + from pathlib import Path + import networkx as nx + + @pytask.mark.depends_on("input.txt") + def task_example(): pass + + def task_create_graph(): + dag = pytask.build_dag({{"paths": Path(__file__).parent}}) + graph = nx.nx_pydot.to_pydot(dag) + path = Path(__file__).parent.joinpath("dag.{format_}") + graph.write(path, prog="{layout}", format=path.suffix[1:]) + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + tmp_path.joinpath("input.txt").touch() + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + assert result.exit_code == 0 + assert tmp_path.joinpath(f"dag.{format_}").exists() diff --git a/tests/test_resolve_dependencies.py b/tests/test_resolve_dependencies.py index 6c90d765..f8f7f210 100644 --- a/tests/test_resolve_dependencies.py +++ b/tests/test_resolve_dependencies.py @@ -28,7 +28,7 @@ def state(self): @pytest.mark.unit -def test_create_dag(): +def test_pytask_resolve_dependencies_create_dag(): root = Path.cwd() / "src" task = PythonFunctionTask( "task_dummy", diff --git a/tox.ini b/tox.ini index 77026ab1..81ed7625 100644 --- a/tox.ini +++ b/tox.ini @@ -12,20 +12,25 @@ conda_channels = conda-forge nodefaults conda_deps = + # pytest pytest pytest-cov pytest-xdist - sphinx + # Package dependencies attrs click click-default-group networkx - pexpect pluggy pony >= 0.7.13 rich + # Optional and test dependencies + graphviz + pexpect + pydot + commands = pip install --no-deps -e . pytest {posargs} From 2d8fd30f508a1c152ec70370e9ab0e340aa3ecd6 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 12 Jun 2021 15:54:13 +0200 Subject: [PATCH 2/4] add to changes. --- docs/changes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changes.rst b/docs/changes.rst index 98d376e0..6a2edf09 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -23,6 +23,7 @@ all releases are available on `PyPI `_ and - :gh:`93` fixes the display of parametrized arguments in the console. - :gh:`94` adds ``--show-locals`` which allows to print local variables in tracebacks. - :gh:`96` implements a spinner to show the progress during the collection. +- :gh:`101` allows to visualize the project's DAG. 0.0.14 - 2021-03-23 From 80ebc26a0d07e8abcdb8949165a90c058b1bfdb1 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 12 Jun 2021 15:55:40 +0200 Subject: [PATCH 3/4] more to changes. --- docs/changes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/changes.rst b/docs/changes.rst index 6a2edf09..2aea3dff 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -23,6 +23,7 @@ all releases are available on `PyPI `_ and - :gh:`93` fixes the display of parametrized arguments in the console. - :gh:`94` adds ``--show-locals`` which allows to print local variables in tracebacks. - :gh:`96` implements a spinner to show the progress during the collection. +- :gh:`99` enables color support in WSL and fixes ``show_locals`` during collection. - :gh:`101` allows to visualize the project's DAG. From 6dee619dda8994f57f0aeb49b9eb4c680c1b11d7 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 12 Jun 2021 19:34:02 +0200 Subject: [PATCH 4/4] Add tutorial. --- docs/tutorials/how_to_visualize_the_dag.rst | 57 +++++++++++++++++++++ docs/tutorials/index.rst | 1 + 2 files changed, 58 insertions(+) create mode 100644 docs/tutorials/how_to_visualize_the_dag.rst diff --git a/docs/tutorials/how_to_visualize_the_dag.rst b/docs/tutorials/how_to_visualize_the_dag.rst new file mode 100644 index 00000000..89d29ea4 --- /dev/null +++ b/docs/tutorials/how_to_visualize_the_dag.rst @@ -0,0 +1,57 @@ +How to visualize the DAG +======================== + +pytask offers two interfaces to visualize the :term:`DAG` of your project. + + +Command line interface +---------------------- + +You can quickly create a visualization from the command line by entering + +.. code-block:: console + + $ pytask dag + +at the top of your project which will generate a ``dag.pdf``. + +There are ways to customize the visualization. + +1. You can change the layout of the graph by using the ``-l/--layout`` option. By + default, it is set to ``dot`` and produces a hierarchical layout. graphviz supports + other layouts as well which are listed `here `_. + +2. Using the ``-o/--output-path`` option, you can provide a file name for the graph. The + file extension changes the output format if it is supported by `pydot + `_. + + +Programmatic Interface +---------------------- + +Since the possibilities for customization are limited via the command line interface, +there also exists a programmatic and interactive interface. + +Similar to :func:`pytask.main`, there exists :func:`pytask.build_dag` which returns the +DAG as a :class:`networkx.DiGraph`. + +.. code-block:: python + + @pytask.mark.produces(BLD / "dag.svg") + def task_draw_dag(produces): + dag = pytask.build_dag({"paths": SRC}) + +Customization works best on the :class:`networkx.DiGraph`. For example, here we set the +shape of all nodes to hexagons by adding the property to the node attributes. + +.. code-block:: python + + nx.set_node_attributes(dag, "hexagon", "shape") + +For drawing, you better switch to pydot or pygraphviz since the matplotlib backend +handles shapes with texts poorly. Here we use pydot and store the graph as an ``.svg``. + +.. code-block:: python + + graph = nx.nx_pydot.to_pydot(dag) + graph.write_svg(produces) diff --git a/docs/tutorials/index.rst b/docs/tutorials/index.rst index 65d53bdc..d2286618 100644 --- a/docs/tutorials/index.rst +++ b/docs/tutorials/index.rst @@ -23,3 +23,4 @@ project. Start here if you are a new user. how_to_capture how_to_invoke_pytask how_to_use_plugins + how_to_visualize_the_dag