diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index e040a05b..47e65d65 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -25,6 +25,7 @@ dependencies: - networkx - pluggy - pony >=0.7.15 + - pybaum - pexpect - rich - typing-extensions diff --git a/docs/source/changes.rst b/docs/source/changes.rst index d8ef315e..267b8aa1 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -9,6 +9,8 @@ all releases are available on `PyPI `_ and 0.2.0 - 2022-xx-xx ------------------ +- :pull:`211` allows for flexible dependencies and products which can be any pytree of + native Python objects as supported by pybaum. - :pull:`227` implements ``task.kwargs`` as a new way for a task to hold parametrized arguments. It also implements :class:`_pytask.models.CollectionMetadata` to carry parametrized arguments to the task class. diff --git a/docs/source/tutorials/how_to_define_dependencies_products.rst b/docs/source/tutorials/how_to_define_dependencies_products.rst index 7107e8b7..657db504 100644 --- a/docs/source/tutorials/how_to_define_dependencies_products.rst +++ b/docs/source/tutorials/how_to_define_dependencies_products.rst @@ -85,8 +85,30 @@ Multiple dependencies and products ---------------------------------- Most tasks have multiple dependencies or products. The easiest way to attach multiple -dependencies or products to a task is to pass a :class:`dict`, :class:`list` or another -iterator to the marker containing the paths. +dependencies or products to a task is to pass a :class:`dict` (highly recommended), +:class:`list` or another iterator to the marker containing the paths. + +To assign labels to dependencies or products, pass a dictionary. For example, + +.. code-block:: python + + @pytask.mark.produces({"first": BLD / "data_0.pkl", "second": BLD / "data_1.pkl"}) + def task_create_random_data(produces): + ... + +Then, use + +.. code-block:: pycon + + >>> produces["first"] + BLD / "data_0.pkl" + + >>> produces["second"] + BLD / "data_1.pkl" + +inside the task function. + +You can also use lists and other iterables. .. code-block:: python @@ -102,8 +124,9 @@ where keys are the positions in the list. >>> produces {0: BLD / "data_0.pkl", 1: BLD / "data_1.pkl"} -Why dictionaries and not lists? First, dictionaries with positions as keys behave very -similar to lists and conversion between both is easy. +Why does pytask recommend dictionaries and even converts lists to dictionaries? First, +dictionaries with positions as keys behave very similar to lists and conversion between +both is easy. .. tip:: @@ -113,47 +136,100 @@ Secondly, dictionaries use keys instead of positions which is more verbose and descriptive and does not assume a fixed ordering. Both attributes are especially desirable in complex projects. -To assign labels to dependencies or products, pass a dictionary. For example, + +Multiple decorators +------------------- + +You can also attach multiple decorators to a function which will be merged into a single +dictionary. This might help you to group certain dependencies and apply them to multiple +tasks. .. code-block:: python - @pytask.mark.produces({"first": BLD / "data_0.pkl", "second": BLD / "data_1.pkl"}) - def task_create_random_data(produces): + common_dependencies = ["text_1.txt", "text_2.txt"] + + + @pytask.mark.depends_on(common_dependencies) + @pytask.mark.depends_on("text_3.txt") + def task_example(): ... -Then, use -.. code-block:: pycon +Nested dependencies and products +-------------------------------- - >>> produces["first"] - BLD / "data_0.pkl" +Dependencies and products are allowed to be nested containers consisting of tuples, +lists, and dictionaries. In situations where you want more structure and are otherwise +forced to flatten your inputs, this can be beneficial. - >>> produces["second"] - BLD / "data_1.pkl" +Here is an example with a task which fits some model on data. It depends on a module +containing the code for the model which is not actively used, but ensures that the task +is rerun when the model is changed. And, it depends on data. -inside the task function. +.. code-block:: python + @pytask.mark.depends_on( + { + "model": [SRC / "models" / "model.py"], + "data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}, + } + ) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): + ... -Multiple decorators -------------------- +It is also possible to merge nested containers. For example, you might want to reuse +the dependency on models for other tasks as well. -You can also attach multiple decorators to a function which will be merged into a single -dictionary. This might help you to group certain dependencies and apply them to multiple -tasks. +.. code-block:: python + + model_dependencies = pytask.mark.depends_on({"model": [SRC / "models" / "model.py"]}) + + + @model_dependencies + @pytask.mark.depends_on( + {"data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}} + ) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): + ... + +In both cases, ``depends_on`` within the function will be .. code-block:: python - common_dependencies = ["text_1.txt", "text_2.txt"] + { + "model": [SRC / "models" / "model.py"], + "data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}, + } +Tuples and lists are converted to dictionaries with integer keys. The innermost +decorator is evaluated first. - @pytask.mark.depends_on(common_dependencies) - @pytask.mark.depends_on("text_3.txt") - def task_example(): +.. code-block:: python + + @pytask.mark.depends_on([SRC / "models" / "model.py"]) + @pytask.mark.depends_on([SRC / "data" / "a.pkl", SRC / "data" / "b.pkl"]) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): ... +would give + +.. code-block:: python + + {0: SRC / "data" / "a.pkl", 1: SRC / "data" / "b.pkl", 2: SRC / "models" / "model.py"} + +.. seealso:: + + The general concept behind nested objects like tuples, lists, and dictionaries is + called pytrees and is more extensively explained in the `documentation of pybaum + `_ which serves pytask under the + hood. + References ---------- .. [1] The official documentation for :mod:`pathlib`. -.. [2] A guide for pathlib at `RealPython `_. +.. [2] A guide for pathlib by `realpython `_. diff --git a/environment.yml b/environment.yml index b7f5d234..5e78ecbf 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - networkx - pluggy - pony >=0.7.15 + - pybaum - rich - typing-extensions diff --git a/setup.cfg b/setup.cfg index f71db222..1b35e207 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,7 @@ install_requires = packaging pluggy pony>=0.7.15 + pybaum rich typing-extensions python_requires = >=3.7 diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index 0edcaf21..24580d28 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -9,6 +9,7 @@ from typing import Any from typing import Generator from typing import Iterable +from typing import List from typing import TYPE_CHECKING import attr @@ -26,6 +27,7 @@ from _pytask.session import Session from _pytask.shared import get_first_non_none_value from _pytask.traceback import render_exc_info +from pybaum.tree_util import tree_just_yield if TYPE_CHECKING: @@ -190,7 +192,7 @@ def _yield_paths_from_task(task: MetaTask) -> Generator[Path, None, None]: """Yield all paths attached to a task.""" yield task.path for attribute in ["depends_on", "produces"]: - for node in getattr(task, attribute).values(): + for node in tree_just_yield(getattr(task, attribute)): if hasattr(node, "path") and isinstance(node.path, Path): yield node.path @@ -234,7 +236,7 @@ class _RecursivePathNode: """ path = attr.ib(type=Path) - sub_nodes = attr.ib(type="list[_RecursivePathNode]") + sub_nodes = attr.ib(type=List["_RecursivePathNode"]) is_dir = attr.ib(type=bool) is_file = attr.ib(type=bool) is_unknown = attr.ib(type=bool) diff --git a/src/_pytask/collect_command.py b/src/_pytask/collect_command.py index 0f4c2ce5..09257f66 100644 --- a/src/_pytask/collect_command.py +++ b/src/_pytask/collect_command.py @@ -25,6 +25,7 @@ from _pytask.path import relative_to from _pytask.pluginmanager import get_plugin_manager from _pytask.session import Session +from pybaum.tree_util import tree_just_flatten from rich.text import Text from rich.tree import Tree @@ -125,13 +126,8 @@ def _find_common_ancestor_of_all_nodes( for task in tasks: all_paths.append(task.path) if show_nodes: - all_paths.extend( - [ - node.path - for attr in ("depends_on", "produces") - for node in getattr(task, attr).values() - ] - ) + all_paths.extend(map(lambda x: x.path, tree_just_flatten(task.depends_on))) + all_paths.extend(map(lambda x: x.path, tree_just_flatten(task.produces))) common_ancestor = find_common_ancestor(*all_paths, *paths) @@ -201,7 +197,9 @@ def _print_collected_tasks( ) if show_nodes: - for node in sorted(task.depends_on.values(), key=lambda x: x.path): + for node in sorted( + tree_just_flatten(task.depends_on), key=lambda x: x.path + ): reduced_node_name = relative_to(node.path, common_ancestor) url_style = create_url_style_for_path(node.path, editor_url_scheme) task_branch.add( @@ -213,7 +211,9 @@ def _print_collected_tasks( ) ) - for node in sorted(task.produces.values(), key=lambda x: x.path): + for node in sorted( + tree_just_flatten(task.produces), key=lambda x: x.path + ): reduced_node_name = relative_to(node.path, common_ancestor) url_style = create_url_style_for_path(node.path, editor_url_scheme) task_branch.add( diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 06312816..bbf9abcd 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -29,6 +29,7 @@ from _pytask.traceback import format_exception_without_traceback from _pytask.traceback import remove_traceback_from_exc_info from _pytask.traceback import render_exc_info +from pybaum.tree_util import tree_map from rich.text import Text @@ -157,13 +158,7 @@ def pytask_execute_task(task: MetaTask) -> bool: for arg_name in ("depends_on", "produces"): if arg_name in func_arg_names: attribute = getattr(task, arg_name) - kwargs[arg_name] = ( - attribute[0].value - if len(attribute) == 1 - and 0 in attribute - and not task.keep_dict[arg_name] - else {name: node.value for name, node in attribute.items()} - ) + kwargs[arg_name] = tree_map(lambda x: x.value, attribute) task.execute(**kwargs) return True diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 33793933..5f63b98c 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -3,6 +3,7 @@ import functools import itertools +import uuid from abc import ABCMeta from abc import abstractmethod from pathlib import Path @@ -21,6 +22,7 @@ from _pytask.exceptions import NodeNotFoundError from _pytask.mark_utils import remove_markers_from_func from _pytask.session import Session +from pybaum import tree_map if TYPE_CHECKING: @@ -119,8 +121,6 @@ class PythonFunctionTask(MetaTask): """Optional[List[Mark]]: A list of markers attached to the task function.""" kwargs = attr.ib(factory=dict, type=Dict[str, Any]) """Dict[str, Any]: A dictionary with keyword arguments supplied to the task.""" - keep_dict = attr.ib(factory=dict, type=Dict[str, bool]) - """Dict[str, bool]: Should dictionaries for single nodes be preserved?""" _report_sections = attr.ib(factory=list, type=List[Tuple[str, str, str]]) """List[Tuple[str, str, str]]: Reports with entries for when, what, and content.""" attributes = attr.ib(factory=dict, type=Dict[Any, Any]) @@ -135,17 +135,13 @@ def from_path_name_function_session( cls, path: Path, name: str, function: Callable[..., Any], session: Session ) -> PythonFunctionTask: """Create a task from a path, name, function, and session.""" - keep_dictionary = {} - objects = _extract_nodes_from_function_markers(function, depends_on) - nodes, keep_dict_de = _convert_objects_to_node_dictionary(objects, "depends_on") - keep_dictionary["depends_on"] = keep_dict_de - dependencies = _collect_nodes(session, path, name, nodes) + nodes = _convert_objects_to_node_dictionary(objects, "depends_on") + dependencies = tree_map(lambda x: _collect_node(session, path, name, x), nodes) objects = _extract_nodes_from_function_markers(function, produces) - nodes, keep_dict_prod = _convert_objects_to_node_dictionary(objects, "produces") - keep_dictionary["produces"] = keep_dict_prod - products = _collect_nodes(session, path, name, nodes) + nodes = _convert_objects_to_node_dictionary(objects, "produces") + products = tree_map(lambda x: _collect_node(session, path, name, x), nodes) markers = [ marker @@ -167,7 +163,6 @@ def from_path_name_function_session( produces=products, markers=markers, kwargs=kwargs, - keep_dict=keep_dictionary, ) def execute(self, **kwargs: Any) -> None: @@ -216,8 +211,8 @@ def state(self) -> str | None: return str(self.path.stat().st_mtime) -def _collect_nodes( - session: Session, path: Path, name: str, nodes: dict[str, str | Path] +def _collect_node( + session: Session, path: Path, name: str, node: str | Path ) -> dict[str, MetaNode]: """Collect nodes for a task. @@ -243,21 +238,16 @@ def _collect_nodes( If the node could not collected. """ - collected_nodes = {} - - for node_name, node in nodes.items(): - collected_node = session.hook.pytask_collect_node( - session=session, path=path, node=node + collected_node = session.hook.pytask_collect_node( + session=session, path=path, node=node + ) + if collected_node is None: + raise NodeNotCollectedError( + f"{node!r} cannot be parsed as a dependency or product for task " + f"{name!r} in {path!r}." ) - if collected_node is None: - raise NodeNotCollectedError( - f"{node!r} cannot be parsed as a dependency or product for task " - f"{name!r} in {path!r}." - ) - else: - collected_nodes[node_name] = collected_node - return collected_nodes + return collected_node def _extract_nodes_from_function_markers( @@ -277,80 +267,50 @@ def _extract_nodes_from_function_markers( yield parsed -def _convert_objects_to_node_dictionary( - objects: Any, when: str -) -> tuple[dict[Any, Any], bool]: +def _convert_objects_to_node_dictionary(objects: Any, when: str) -> dict[Any, Any]: """Convert objects to node dictionary.""" - list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(objects, when) - _check_that_names_are_not_used_multiple_times(list_of_tuples, when) - nodes = _convert_nodes_to_dictionary(list_of_tuples) - return nodes, keep_dict + list_of_dicts = [convert_to_dict(x) for x in objects] + _check_that_names_are_not_used_multiple_times(list_of_dicts, when) + nodes = merge_dictionaries(list_of_dicts) + return nodes -def _convert_objects_to_list_of_tuples( - objects: Any | tuple[Any, Any] | list[Any] | list[tuple[Any, Any]], when: str -) -> tuple[list[tuple[Any, ...]], bool]: - """Convert objects to list of tuples. +@attr.s(frozen=True) +class _Placeholder: + scalar = attr.ib(type=bool, default=False) + id_ = attr.ib(factory=uuid.uuid4, type=uuid.UUID) - Examples - -------- - _convert_objects_to_list_of_tuples([{0: 0}, [4, (3, 2)], ((1, 4),)) - [(0, 0), (4,), (3, 2), (1, 4)], False - """ - keep_dict = False - - out = [] - for obj in objects: - if isinstance(obj, dict): - obj = obj.items() - - if isinstance(obj, Iterable) and not isinstance(obj, str): - keep_dict = True - for x in obj: - if isinstance(x, Iterable) and not isinstance(x, str): - tuple_x = tuple(x) - if len(tuple_x) in [1, 2]: - out.append(tuple_x) - else: - name = "Dependencies" if when == "depends_on" else "Products" - raise ValueError( - f"{name} in pytask.mark.{when} can be given as a value or " - "a name and a value which is 1 or 2 elements. The " - f"following node has {len(tuple_x)} elements: {tuple_x}." - ) - else: - out.append((x,)) +def convert_to_dict(x: Any, first_level: bool = True) -> Any | dict[Any, Any]: + if isinstance(x, dict): + return {k: convert_to_dict(v, False) for k, v in x.items()} + elif isinstance(x, Iterable) and not isinstance(x, str): + if first_level: + return { + _Placeholder(): convert_to_dict(element, False) + for i, element in enumerate(x) + } else: - out.append((obj,)) - - if len(out) > 1: - keep_dict = False - - return out, keep_dict + return {i: convert_to_dict(element, False) for i, element in enumerate(x)} + elif first_level: + return {_Placeholder(scalar=True): x} + else: + return x def _check_that_names_are_not_used_multiple_times( - list_of_tuples: list[tuple[Any, ...]], when: str + list_of_dicts: list[dict[Any, Any]], when: str ) -> None: """Check that names of nodes are not assigned multiple times. Tuples in the list have either one or two elements. The first element in the two element tuples is the name and cannot occur twice. - Examples - -------- - >>> _check_that_names_are_not_used_multiple_times( - ... [("a",), ("a", 1)], "depends_on" - ... ) - >>> _check_that_names_are_not_used_multiple_times( - ... [("a", 0), ("a", 1)], "produces" - ... ) - Traceback (most recent call last): - ValueError: '@pytask.mark.produces' has nodes with the same name: {'a'} - """ - names = [x[0] for x in list_of_tuples if len(x) == 2] + names_with_provisional_keys = list( + itertools.chain.from_iterable(dict_.keys() for dict_ in list_of_dicts) + ) + names = [x for x in names_with_provisional_keys if not isinstance(x, _Placeholder)] duplicated = find_duplicates(names) if duplicated: @@ -359,37 +319,63 @@ def _check_that_names_are_not_used_multiple_times( ) -def _convert_nodes_to_dictionary( - list_of_tuples: list[tuple[Any, ...]] -) -> dict[Any, Any]: - """Convert nodes to dictionaries. +def union_of_dictionaries(dicts: list[dict[Any, Any]]) -> dict[Any, Any]: + """Merge multiple dictionaries in one. Examples -------- - >>> _convert_nodes_to_dictionary([(0,), (1,)]) - {0: 0, 1: 1} - >>> _convert_nodes_to_dictionary([(1, 0), (1,)]) - {1: 0, 0: 1} + >>> a, b = {"a": 0}, {"b": 1} + >>> union_of_dictionaries([a, b]) + {'a': 0, 'b': 1} + + >>> a, b = {'a': 0}, {'a': 1} + >>> union_of_dictionaries([a, b]) + {'a': 1} """ - nodes = {} - counter = itertools.count() - names = [x[0] for x in list_of_tuples if len(x) == 2] + return dict(itertools.chain.from_iterable(dict_.items() for dict_ in dicts)) - for tuple_ in list_of_tuples: - if len(tuple_) == 2: - node_name, node = tuple_ - nodes[node_name] = node - else: - while True: - node_name = next(counter) - if node_name not in names: - break +def merge_dictionaries(list_of_dicts: list[dict[Any, Any]]) -> dict[Any, Any]: + """Merge multiple dictionaries. - nodes[node_name] = tuple_[0] + The function does not perform a deep merge. It simply merges the dictionary based on + the first level keys which are either unique names or placeholders. During the merge + placeholders will be replaced by an incrementing integer. - return nodes + Examples + -------- + >>> a, b = {"a": 0}, {"b": 1} + >>> merge_dictionaries([a, b]) + {'a': 0, 'b': 1} + + >>> a, b = {_Placeholder(): 0}, {_Placeholder(): 1} + >>> merge_dictionaries([a, b]) + {0: 0, 1: 1} + + """ + merged_dict = union_of_dictionaries(list_of_dicts) + + if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): + placeholder, value = list(merged_dict.items())[0] + if placeholder.scalar: + out = value + else: + out = {0: value} + else: + counter = itertools.count() + out = {} + for k, v in merged_dict.items(): + if isinstance(k, _Placeholder): + while True: + possible_key = next(counter) + if possible_key not in merged_dict and possible_key not in out: + out[possible_key] = v + break + else: + out[k] = v + + return out def create_task_name(path: Path, base_name: str) -> str: diff --git a/src/_pytask/resolve_dependencies.py b/src/_pytask/resolve_dependencies.py index 67f71777..246a118b 100644 --- a/src/_pytask/resolve_dependencies.py +++ b/src/_pytask/resolve_dependencies.py @@ -29,6 +29,7 @@ from _pytask.shared import reduce_node_name from _pytask.traceback import render_exc_info from pony import orm +from pybaum import tree_map from rich.text import Text from rich.tree import Tree @@ -76,13 +77,11 @@ def pytask_resolve_dependencies_create_dag(tasks: list[MetaTask]) -> nx.DiGraph: for task in tasks: dag.add_node(task.name, task=task) - for dependency in task.depends_on.values(): - dag.add_node(dependency.name, node=dependency) - dag.add_edge(dependency.name, task.name) + tree_map(lambda x: dag.add_node(x.name, node=x), task.depends_on) + tree_map(lambda x: dag.add_edge(x.name, task.name), task.depends_on) - for product in task.produces.values(): - dag.add_node(product.name, node=product) - dag.add_edge(task.name, product.name) + tree_map(lambda x: dag.add_node(x.name, node=x), task.produces) + tree_map(lambda x: dag.add_edge(task.name, x.name), task.produces) _check_if_dag_has_cycles(dag) diff --git a/tests/test_collect.py b/tests/test_collect.py index 6d4790fa..3d4eb7d4 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -141,12 +141,23 @@ def test_collect_files_w_custom_file_name_pattern( Path.cwd() / "text.txt", id="test with absolute string path", ), + pytest.param( + Session({"check_casing_of_paths": False}, None), + Path(), + 1, + does_not_raise(), + None, + id="test cannot collect node", + ), ], ) def test_pytask_collect_node(session, path, node, expectation, expected): with expectation: result = pytask_collect_node(session, path, node) - assert str(result.path) == str(expected) + if result is None: + assert result is expected + else: + assert str(result.path) == str(expected) @pytest.mark.unit diff --git a/tests/test_execute.py b/tests/test_execute.py index 3798020a..87368bff 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -133,7 +133,6 @@ def test_assert_multiple_dependencies_are_merged_to_dict(tmp_path, runner): import pytask from pathlib import Path - @pytask.mark.depends_on([(5, "in_5.txt"), (6, "in_6.txt")]) @pytask.mark.depends_on({3: "in_3.txt", 4: "in_4.txt"}) @pytask.mark.depends_on(["in_1.txt", "in_2.txt"]) @pytask.mark.depends_on("in_0.txt") @@ -141,13 +140,13 @@ def test_assert_multiple_dependencies_are_merged_to_dict(tmp_path, runner): def task_example(depends_on, produces): expected = { i: Path(__file__).parent.joinpath(f"in_{i}.txt").resolve() - for i in range(7) + for i in range(5) } assert depends_on == expected produces.touch() """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) - for name in [f"in_{i}.txt" for i in range(7)]: + for name in [f"in_{i}.txt" for i in range(5)]: tmp_path.joinpath(name).touch() result = runner.invoke(cli, [tmp_path.as_posix()]) @@ -162,14 +161,13 @@ def test_assert_multiple_products_are_merged_to_dict(tmp_path, runner): from pathlib import Path @pytask.mark.depends_on("in.txt") - @pytask.mark.produces([(5, "out_5.txt"), (6, "out_6.txt")]) @pytask.mark.produces({3: "out_3.txt", 4: "out_4.txt"}) @pytask.mark.produces(["out_1.txt", "out_2.txt"]) @pytask.mark.produces("out_0.txt") def task_example(depends_on, produces): expected = { i: Path(__file__).parent.joinpath(f"out_{i}.txt").resolve() - for i in range(7) + for i in range(5) } assert produces == expected for product in produces.values(): diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 16956e90..554e2501 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from contextlib import ExitStack as does_not_raise # noqa: N813 from pathlib import Path @@ -7,12 +8,13 @@ import pytask import pytest from _pytask.nodes import _check_that_names_are_not_used_multiple_times -from _pytask.nodes import _convert_nodes_to_dictionary -from _pytask.nodes import _convert_objects_to_list_of_tuples from _pytask.nodes import _convert_objects_to_node_dictionary from _pytask.nodes import _extract_nodes_from_function_markers +from _pytask.nodes import _Placeholder +from _pytask.nodes import convert_to_dict from _pytask.nodes import create_task_name from _pytask.nodes import depends_on +from _pytask.nodes import merge_dictionaries from _pytask.nodes import produces from _pytask.shared import reduce_node_name from pytask import FilePathNode @@ -110,54 +112,6 @@ def state(self): assert isinstance(task, MetaNode) -@pytest.mark.unit -@pytest.mark.parametrize( - ("x", "when", "expectation", "expected_lot", "expected_kd"), - [ - (["string"], "depends_on", does_not_raise(), [("string",)], False), - (("string",), "depends_on", does_not_raise(), [("string",)], False), - (range(2), "depends_on", does_not_raise(), [(0,), (1,)], False), - ( - [{"a": 0, "b": 1}], - "depends_on", - does_not_raise(), - [("a", 0), ("b", 1)], - False, - ), - ( - ["a", ("b", "c"), {"d": 1, "e": 1}], - "depends_on", - does_not_raise(), - [("a",), ("b",), ("c",), ("d", 1), ("e", 1)], - False, - ), - ([["string"]], "depends_on", does_not_raise(), [("string",)], True), - ([{0: "string"}], "depends_on", does_not_raise(), [(0, "string")], True), - ( - [((0, 1, 2),)], - "depends_on", - pytest.raises(ValueError, match="Dependencies in pytask.mark.depends_on"), - None, - None, - ), - ( - [((0, 1, 2),)], - "produces", - pytest.raises(ValueError, match="Products in pytask.mark.produces"), - None, - None, - ), - ], -) -def test_convert_objects_to_list_of_tuples( - x, when, expectation, expected_lot, expected_kd -): - with expectation: - list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(x, when) - assert list_of_tuples == expected_lot - assert keep_dict is expected_kd - - ERROR = "'@pytask.mark.depends_on' has nodes with the same name:" @@ -165,13 +119,12 @@ def test_convert_objects_to_list_of_tuples( @pytest.mark.parametrize( ("x", "expectation"), [ - ([(0, "a"), (0, "b")], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("b",), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("b", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a",), ("a")], does_not_raise()), - ([("a", 0), ("a",)], does_not_raise()), - ([("a", 0), ("b", 1)], does_not_raise()), + ([{0: "a"}, {0: "b"}], pytest.raises(ValueError, match=ERROR)), + ([{"a": 0}, {"a": 1}], pytest.raises(ValueError, match=ERROR)), + ([{"a": 0}, {"b": 0}, {"a": 1}], pytest.raises(ValueError, match=ERROR)), + ([{0: "a"}, {1: "a"}], does_not_raise()), + ([{"a": 0}, {0: "a"}], does_not_raise()), + ([{"a": 0}, {"b": 1}], does_not_raise()), ], ) def test_check_that_names_are_not_used_multiple_times(x, expectation): @@ -179,19 +132,6 @@ def test_check_that_names_are_not_used_multiple_times(x, expectation): _check_that_names_are_not_used_multiple_times(x, "depends_on") -@pytest.mark.unit -@pytest.mark.parametrize( - ("x", "expected"), - [ - ([("a",), ("b",)], {0: "a", 1: "b"}), - ([(1, "a"), ("b",), (0, "c")], {1: "a", 2: "b", 0: "c"}), - ], -) -def test_convert_nodes_to_dictionary(x, expected): - result = _convert_nodes_to_dictionary(x) - assert result == expected - - @pytest.mark.unit @pytest.mark.parametrize( "path, name, expected", @@ -249,25 +189,72 @@ def test_reduce_node_name(node, paths, expectation, expected): @pytest.mark.integration @pytest.mark.parametrize("when", ["depends_on", "produces"]) @pytest.mark.parametrize( - "objects, expectation, expected_dict, expected_kd", + "objects, expectation, expected", [ - ([0, 1], does_not_raise, {0: 0, 1: 1}, False), - ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}, False), - ([{0: 0}], does_not_raise, {0: 0}, True), - ([[0]], does_not_raise, {0: 0}, True), - ([((0, 0),), ((0, 1),)], ValueError, None, None), - ([{0: 0}, {0: 1}], ValueError, None, None), + ([0, 1], does_not_raise, {0: 0, 1: 1}), + ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}), + ([{0: 0}], does_not_raise, {0: 0}), + ([[0]], does_not_raise, {0: 0}), + ( + [((0, 0),), ((0, 1),)], + does_not_raise, + {0: {0: 0, 1: 0}, 1: {0: 0, 1: 1}}, + ), + ([{0: {0: {0: 0}}}, [2]], does_not_raise, {0: {0: {0: 0}}, 1: 2}), + ([{0: 0}, {0: 1}], ValueError, None), ], ) -def test_convert_objects_to_node_dictionary( - objects, when, expectation, expected_dict, expected_kd -): +def test_convert_objects_to_node_dictionary(objects, when, expectation, expected): expectation = ( pytest.raises(expectation, match=f"'@pytask.mark.{when}' has nodes") if expectation == ValueError else expectation() ) with expectation: - node_dict, keep_dict = _convert_objects_to_node_dictionary(objects, when) - assert node_dict == expected_dict - assert keep_dict is expected_kd + nodes = _convert_objects_to_node_dictionary(objects, when) + assert nodes == expected + + +def _convert_placeholders_to_tuples(x): + counter = itertools.count() + return { + (next(counter), k.scalar) + if isinstance(k, _Placeholder) + else k: _convert_placeholders_to_tuples(v) + if isinstance(v, dict) + else v + for k, v in x.items() + } + + +@pytest.mark.unit +@pytest.mark.parametrize( + "x, first_level, expected", + [ + (1, True, {(0, True): 1}), + ({1: 0}, False, {1: 0}), + ({1: [2, 3]}, False, {1: {0: 2, 1: 3}}), + ([2, 3], True, {(0, False): 2, (1, False): 3}), + ([2, 3], False, {0: 2, 1: 3}), + ], +) +def test_convert_to_dict(x, first_level, expected): + """We convert placeholders to a tuple consisting of the key and the scalar bool.""" + result = convert_to_dict(x, first_level) + modified_result = _convert_placeholders_to_tuples(result) + assert modified_result == expected + + +@pytest.mark.unit +@pytest.mark.parametrize( + "list_of_dicts, expected", + [ + ([{1: 0}, {0: 1}], {1: 0, 0: 1}), + ([{_Placeholder(): 1}, {0: 0}], {1: 1, 0: 0}), + ([{_Placeholder(scalar=True): 1}], 1), + ([{_Placeholder(scalar=False): 1}], {0: 1}), + ], +) +def test_merge_dictionaries(list_of_dicts, expected): + result = merge_dictionaries(list_of_dicts) + assert result == expected diff --git a/tests/test_profile.py b/tests/test_profile.py index 531d3085..7b4f242d 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -109,6 +109,7 @@ def task_example(): time.sleep(2) assert tmp_path.joinpath(f"profile.{export}").exists() +@pytest.mark.unit @pytest.mark.parametrize( "bytes_, units, expected", [ diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py new file mode 100644 index 00000000..052e7591 --- /dev/null +++ b/tests/test_pybaum.py @@ -0,0 +1,77 @@ +"""This module contains tests for pybaum and flexible dependencies and products.""" +from __future__ import annotations + +import textwrap + +import pytest +from _pytask.outcomes import ExitCode +from pybaum import tree_map +from pytask import cli +from pytask import main + + +@pytest.mark.end_to_end +@pytest.mark.parametrize( + "decorator_name, exit_code", [("depends_on", 4), ("produces", 1)] +) +def test_task_with_complex_product_did_not_produce_node( + tmp_path, decorator_name, exit_code +): + source = f""" + import pytask + + + complex = [ + "out.txt", + ("tuple_out.txt",), + ["list_out.txt"], + {{"a": "dict_out.txt", "b": {{"c": "dict_out_2.txt"}}}}, + ] + + + @pytask.mark.{decorator_name}(complex) + def task_example(): + pass + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + session = main({"paths": tmp_path}) + + assert session.exit_code == exit_code + + products = tree_map(lambda x: x.value, getattr(session.tasks[0], decorator_name)) + expected = { + 0: tmp_path / "out.txt", + 1: {0: tmp_path / "tuple_out.txt"}, + 2: {0: tmp_path / "list_out.txt"}, + 3: {"a": tmp_path / "dict_out.txt", "b": {"c": tmp_path / "dict_out_2.txt"}}, + } + assert products == expected + + +@pytest.mark.end_to_end +def test_profile_with_pybaum(tmp_path, runner): + source = """ + import time + import pytask + from pybaum.tree_util import tree_just_flatten + + @pytask.mark.produces([{"out_1": "out_1.txt"}, {"out_2": "out_2.txt"}]) + def task_example(produces): + time.sleep(2) + for p in tree_just_flatten(produces): + p.write_text("There are nine billion bicycles in Beijing.") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK + + result = runner.invoke(cli, ["profile", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "Collected 1 task." in result.output + assert "Duration (in s)" in result.output + assert "0." in result.output + assert "Size of Products" in result.output + assert "86 bytes" in result.output diff --git a/tests/test_task.py b/tests/test_task.py index 145ea69b..316a1c22 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -8,7 +8,7 @@ from pytask import main -@pytest.mark.unit +@pytest.mark.end_to_end @pytest.mark.parametrize("func_name", ["task_example", "func"]) @pytest.mark.parametrize("task_name", ["the_only_task", None]) def test_task_with_task_decorator(tmp_path, func_name, task_name): @@ -37,7 +37,7 @@ def {func_name}(produces): ) -@pytest.mark.unit +@pytest.mark.end_to_end @pytest.mark.parametrize("func_name", ["task_example", "func"]) @pytest.mark.parametrize("task_name", ["the_only_task", None]) def test_task_with_task_decorator_with_parametrize(tmp_path, func_name, task_name): diff --git a/tox.ini b/tox.ini index b8946225..c7ad568a 100644 --- a/tox.ini +++ b/tox.ini @@ -24,6 +24,7 @@ conda_deps = networkx>=2.4 pluggy pony >= 0.7.15 + pybaum rich # Optional and test dependencies