From fd7f4360dfd2c6198788d0aaee007e802f35af4f Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 00:47:55 +0100 Subject: [PATCH 01/20] Add pybaum. --- docs/rtd_environment.yml | 1 + environment.yml | 1 + setup.cfg | 1 + tests/test_pybaum.py | 41 ++++++++++++++++++++++++++++++++++++++++ tox.ini | 1 + 5 files changed, 45 insertions(+) create mode 100644 tests/test_pybaum.py diff --git a/docs/rtd_environment.yml b/docs/rtd_environment.yml index 76e3cf83..3699890a 100644 --- a/docs/rtd_environment.yml +++ b/docs/rtd_environment.yml @@ -25,6 +25,7 @@ dependencies: - networkx - pluggy - pony >=0.7.15 + - pybaum - pexpect - rich diff --git a/environment.yml b/environment.yml index 57f54b7f..c2f6fa44 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - networkx - pluggy - pony >=0.7.15 + - pybaum - rich # Misc diff --git a/setup.cfg b/setup.cfg index 68da70ff..7508eaf2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,7 @@ install_requires = packaging pluggy pony>=0.7.15 + pybaum rich python_requires = >=3.7 include_package_data = True diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py new file mode 100644 index 00000000..e2944b5e --- /dev/null +++ b/tests/test_pybaum.py @@ -0,0 +1,41 @@ +"""This module contains tests for pybaum and flexible dependencies and products.""" +from __future__ import annotations + +import textwrap + +import pytest +from pytask import main + + +@pytest.mark.end_to_end +def test_task_with_complex_product_did_not_produce_node(tmp_path): + source = """ + import pytask + + + complex_product = [ + "out.txt", + ("tuple_out.txt",), + ["list_out.txt"], + {"a": "dict_out.txt", "b": {"c": "dict_out_2.txt"}}, + ] + + + @pytask.mark.produces(complex_product) + def task_example(): + pass + """ + tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) + + session = main({"paths": tmp_path}) + + assert session.exit_code == 1 + + products = session.tasks[0].produces + expected = { + 0: tmp_path / "out.txt", + 1: {0: tmp_path / "tuple_out.txt"}, + 2: {0: tmp_path / "list_out.txt"}, + 3: {"a": tmp_path / "dict_out.txt", "b": {"c": tmp_path / "dict_out_2.txt"}}, + } + assert products == expected diff --git a/tox.ini b/tox.ini index b8946225..c7ad568a 100644 --- a/tox.ini +++ b/tox.ini @@ -24,6 +24,7 @@ conda_deps = networkx>=2.4 pluggy pony >= 0.7.15 + pybaum rich # Optional and test dependencies From fe6a89c4b6e918bda419cbc24af0fa1ad4001551 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 01:15:31 +0100 Subject: [PATCH 02/20] Move to conversion to dicts, disable tuples as name, value combinations. --- src/_pytask/nodes.py | 131 +++++++++++++++----------------------- tests/test_execute.py | 8 +-- tests/test_nodes.py | 143 ++++++++++++++++++++++-------------------- 3 files changed, 129 insertions(+), 153 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index edbddfec..de43cc84 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -4,6 +4,7 @@ import functools import inspect import itertools +import uuid from abc import ABCMeta from abc import abstractmethod from pathlib import Path @@ -289,76 +290,48 @@ def _convert_objects_to_node_dictionary( objects: Any, when: str ) -> tuple[dict[Any, Any], bool]: """Convert objects to node dictionary.""" - list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(objects, when) - _check_that_names_are_not_used_multiple_times(list_of_tuples, when) - nodes = _convert_nodes_to_dictionary(list_of_tuples) + list_of_dicts = [convert_to_dict(x) for x in objects] + _check_that_names_are_not_used_multiple_times(list_of_dicts, when) + nodes, keep_dict = merge_dictionaries(list_of_dicts) return nodes, keep_dict -def _convert_objects_to_list_of_tuples( - objects: Any | tuple[Any, Any] | list[Any] | list[tuple[Any, Any]], when: str -) -> tuple[list[tuple[Any, ...]], bool]: - """Convert objects to list of tuples. +@attr.s(frozen=True) +class _Placeholder: + scalar = attr.ib(type=bool, default=False) + id_ = attr.ib(factory=uuid.uuid4, type=uuid.UUID) - Examples - -------- - _convert_objects_to_list_of_tuples([{0: 0}, [4, (3, 2)], ((1, 4),)) - [(0, 0), (4,), (3, 2), (1, 4)], False - """ - keep_dict = False - - out = [] - for obj in objects: - if isinstance(obj, dict): - obj = obj.items() - - if isinstance(obj, Iterable) and not isinstance(obj, str): - keep_dict = True - for x in obj: - if isinstance(x, Iterable) and not isinstance(x, str): - tuple_x = tuple(x) - if len(tuple_x) in [1, 2]: - out.append(tuple_x) - else: - name = "Dependencies" if when == "depends_on" else "Products" - raise ValueError( - f"{name} in pytask.mark.{when} can be given as a value or " - "a name and a value which is 1 or 2 elements. The " - f"following node has {len(tuple_x)} elements: {tuple_x}." - ) - else: - out.append((x,)) +def convert_to_dict(x: Any, first_level: bool = True) -> Any | dict[Any, Any]: + if isinstance(x, dict): + return {k: convert_to_dict(v, False) for k, v in x.items()} + elif isinstance(x, Iterable) and not isinstance(x, str): + if first_level: + return { + _Placeholder(): convert_to_dict(element, False) + for i, element in enumerate(x) + } else: - out.append((obj,)) - - if len(out) > 1: - keep_dict = False - - return out, keep_dict + return {i: convert_to_dict(element, False) for i, element in enumerate(x)} + elif first_level: + return {_Placeholder(scalar=True): x} + else: + return x def _check_that_names_are_not_used_multiple_times( - list_of_tuples: list[tuple[Any, ...]], when: str + list_of_dicts: list[dict[Any, Any]], when: str ) -> None: """Check that names of nodes are not assigned multiple times. Tuples in the list have either one or two elements. The first element in the two element tuples is the name and cannot occur twice. - Examples - -------- - >>> _check_that_names_are_not_used_multiple_times( - ... [("a",), ("a", 1)], "depends_on" - ... ) - >>> _check_that_names_are_not_used_multiple_times( - ... [("a", 0), ("a", 1)], "produces" - ... ) - Traceback (most recent call last): - ValueError: '@pytask.mark.produces' has nodes with the same name: {'a'} - """ - names = [x[0] for x in list_of_tuples if len(x) == 2] + names_with_provisional_keys = list( + itertools.chain.from_iterable(dict_.keys() for dict_ in list_of_dicts) + ) + names = [x for x in names_with_provisional_keys if not isinstance(x, _Placeholder)] duplicated = find_duplicates(names) if duplicated: @@ -367,37 +340,37 @@ def _check_that_names_are_not_used_multiple_times( ) -def _convert_nodes_to_dictionary( - list_of_tuples: list[tuple[Any, ...]] -) -> dict[Any, Any]: - """Convert nodes to dictionaries. +def union(dicts: list[dict[Any, Any]]) -> dict[Any, Any]: + return dict(itertools.chain.from_iterable(dict_.items() for dict_ in dicts)) - Examples - -------- - >>> _convert_nodes_to_dictionary([(0,), (1,)]) - {0: 0, 1: 1} - >>> _convert_nodes_to_dictionary([(1, 0), (1,)]) - {1: 0, 0: 1} - """ - nodes = {} - counter = itertools.count() - names = [x[0] for x in list_of_tuples if len(x) == 2] +def merge_dictionaries( + list_of_dicts: list[dict[Any, Any]] +) -> tuple[dict[Any, Any], bool]: + """Merge multiple dictionaries. - for tuple_ in list_of_tuples: - if len(tuple_) == 2: - node_name, node = tuple_ - nodes[node_name] = node + The function does not perform a deep merge. It checks whether first level keys are + unique and fills provisional keys with a running value. - else: - while True: - node_name = next(counter) - if node_name not in names: - break + Tuples in the list have either one or two elements. The first element in the two + element tuples is the name and cannot occur twice. - nodes[node_name] = tuple_[0] + """ + merged_dict = union(list_of_dicts) + + if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): + placeholder, value = list(merged_dict.items())[0] + out = {0: value} + keep_dict = not placeholder.scalar + else: + counter = itertools.count() + out = { + next(counter) if isinstance(k, _Placeholder) else k: v + for k, v in merged_dict.items() + } + keep_dict = True - return nodes + return out, keep_dict def create_task_name(path: Path, base_name: str) -> str: diff --git a/tests/test_execute.py b/tests/test_execute.py index ed025d39..6816bd18 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -133,7 +133,6 @@ def test_assert_multiple_dependencies_are_merged_to_dict(tmp_path, runner): import pytask from pathlib import Path - @pytask.mark.depends_on([(5, "in_5.txt"), (6, "in_6.txt")]) @pytask.mark.depends_on({3: "in_3.txt", 4: "in_4.txt"}) @pytask.mark.depends_on(["in_1.txt", "in_2.txt"]) @pytask.mark.depends_on("in_0.txt") @@ -141,13 +140,13 @@ def test_assert_multiple_dependencies_are_merged_to_dict(tmp_path, runner): def task_example(depends_on, produces): expected = { i: Path(__file__).parent.joinpath(f"in_{i}.txt").resolve() - for i in range(7) + for i in range(5) } assert depends_on == expected produces.touch() """ tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source)) - for name in [f"in_{i}.txt" for i in range(7)]: + for name in [f"in_{i}.txt" for i in range(5)]: tmp_path.joinpath(name).touch() result = runner.invoke(cli, [tmp_path.as_posix()]) @@ -162,14 +161,13 @@ def test_assert_multiple_products_are_merged_to_dict(tmp_path, runner): from pathlib import Path @pytask.mark.depends_on("in.txt") - @pytask.mark.produces([(5, "out_5.txt"), (6, "out_6.txt")]) @pytask.mark.produces({3: "out_3.txt", 4: "out_4.txt"}) @pytask.mark.produces(["out_1.txt", "out_2.txt"]) @pytask.mark.produces("out_0.txt") def task_example(depends_on, produces): expected = { i: Path(__file__).parent.joinpath(f"out_{i}.txt").resolve() - for i in range(7) + for i in range(5) } assert produces == expected for product in produces.values(): diff --git a/tests/test_nodes.py b/tests/test_nodes.py index ac603697..efcec026 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -7,8 +7,6 @@ import pytask import pytest from _pytask.nodes import _check_that_names_are_not_used_multiple_times -from _pytask.nodes import _convert_nodes_to_dictionary -from _pytask.nodes import _convert_objects_to_list_of_tuples from _pytask.nodes import _convert_objects_to_node_dictionary from _pytask.nodes import _extract_nodes_from_function_markers from _pytask.nodes import create_task_name @@ -19,6 +17,9 @@ from _pytask.nodes import produces from _pytask.shared import reduce_node_name +# from _pytask.nodes import _convert_nodes_to_dictionary +# from _pytask.nodes import _convert_objects_to_list_of_tuples + @pytest.mark.unit @pytest.mark.parametrize("decorator", [pytask.mark.depends_on, pytask.mark.produces]) @@ -110,52 +111,52 @@ def state(self): assert isinstance(task, MetaNode) -@pytest.mark.unit -@pytest.mark.parametrize( - ("x", "when", "expectation", "expected_lot", "expected_kd"), - [ - (["string"], "depends_on", does_not_raise(), [("string",)], False), - (("string",), "depends_on", does_not_raise(), [("string",)], False), - (range(2), "depends_on", does_not_raise(), [(0,), (1,)], False), - ( - [{"a": 0, "b": 1}], - "depends_on", - does_not_raise(), - [("a", 0), ("b", 1)], - False, - ), - ( - ["a", ("b", "c"), {"d": 1, "e": 1}], - "depends_on", - does_not_raise(), - [("a",), ("b",), ("c",), ("d", 1), ("e", 1)], - False, - ), - ([["string"]], "depends_on", does_not_raise(), [("string",)], True), - ([{0: "string"}], "depends_on", does_not_raise(), [(0, "string")], True), - ( - [((0, 1, 2),)], - "depends_on", - pytest.raises(ValueError, match="Dependencies in pytask.mark.depends_on"), - None, - None, - ), - ( - [((0, 1, 2),)], - "produces", - pytest.raises(ValueError, match="Products in pytask.mark.produces"), - None, - None, - ), - ], -) -def test_convert_objects_to_list_of_tuples( - x, when, expectation, expected_lot, expected_kd -): - with expectation: - list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(x, when) - assert list_of_tuples == expected_lot - assert keep_dict is expected_kd +# @pytest.mark.unit +# @pytest.mark.parametrize( +# ("x", "when", "expectation", "expected_lot", "expected_kd"), +# [ +# (["string"], "depends_on", does_not_raise(), [("string",)], False), +# (("string",), "depends_on", does_not_raise(), [("string",)], False), +# (range(2), "depends_on", does_not_raise(), [(0,), (1,)], False), +# ( +# [{"a": 0, "b": 1}], +# "depends_on", +# does_not_raise(), +# [("a", 0), ("b", 1)], +# False, +# ), +# ( +# ["a", ("b", "c"), {"d": 1, "e": 1}], +# "depends_on", +# does_not_raise(), +# [("a",), ("b",), ("c",), ("d", 1), ("e", 1)], +# False, +# ), +# ([["string"]], "depends_on", does_not_raise(), [("string",)], True), +# ([{0: "string"}], "depends_on", does_not_raise(), [(0, "string")], True), +# ( +# [((0, 1, 2),)], +# "depends_on", +# pytest.raises(ValueError, match="Dependencies in pytask.mark.depends_on"), +# None, +# None, +# ), +# ( +# [((0, 1, 2),)], +# "produces", +# pytest.raises(ValueError, match="Products in pytask.mark.produces"), +# None, +# None, +# ), +# ], +# ) +# def test_convert_objects_to_list_of_tuples( +# x, when, expectation, expected_lot, expected_kd +# ): +# with expectation: +# list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(x, when) +# assert list_of_tuples == expected_lot +# assert keep_dict is expected_kd ERROR = "'@pytask.mark.depends_on' has nodes with the same name:" @@ -165,13 +166,12 @@ def test_convert_objects_to_list_of_tuples( @pytest.mark.parametrize( ("x", "expectation"), [ - ([(0, "a"), (0, "b")], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("b",), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a", 0), ("b", 0), ("a", 1)], pytest.raises(ValueError, match=ERROR)), - ([("a",), ("a")], does_not_raise()), - ([("a", 0), ("a",)], does_not_raise()), - ([("a", 0), ("b", 1)], does_not_raise()), + ([{0: "a"}, {0: "b"}], pytest.raises(ValueError, match=ERROR)), + ([{"a": 0}, {"a": 1}], pytest.raises(ValueError, match=ERROR)), + ([{"a": 0}, {"b": 0}, {"a": 1}], pytest.raises(ValueError, match=ERROR)), + ([{0: "a"}, {1: "a"}], does_not_raise()), + ([{"a": 0}, {0: "a"}], does_not_raise()), + ([{"a": 0}, {"b": 1}], does_not_raise()), ], ) def test_check_that_names_are_not_used_multiple_times(x, expectation): @@ -179,17 +179,17 @@ def test_check_that_names_are_not_used_multiple_times(x, expectation): _check_that_names_are_not_used_multiple_times(x, "depends_on") -@pytest.mark.unit -@pytest.mark.parametrize( - ("x", "expected"), - [ - ([("a",), ("b",)], {0: "a", 1: "b"}), - ([(1, "a"), ("b",), (0, "c")], {1: "a", 2: "b", 0: "c"}), - ], -) -def test_convert_nodes_to_dictionary(x, expected): - result = _convert_nodes_to_dictionary(x) - assert result == expected +# @pytest.mark.unit +# @pytest.mark.parametrize( +# ("x", "expected"), +# [ +# ([("a",), ("b",)], {0: "a", 1: "b"}), +# ([(1, "a"), ("b",), (0, "c")], {1: "a", 2: "b", 0: "c"}), +# ], +# ) +# def test_convert_nodes_to_dictionary(x, expected): +# result = _convert_nodes_to_dictionary(x) +# assert result == expected @pytest.mark.unit @@ -251,11 +251,16 @@ def test_reduce_node_name(node, paths, expectation, expected): @pytest.mark.parametrize( "objects, expectation, expected_dict, expected_kd", [ - ([0, 1], does_not_raise, {0: 0, 1: 1}, False), - ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}, False), + ([0, 1], does_not_raise, {0: 0, 1: 1}, True), + ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}, True), ([{0: 0}], does_not_raise, {0: 0}, True), ([[0]], does_not_raise, {0: 0}, True), - ([((0, 0),), ((0, 1),)], ValueError, None, None), + ( + [((0, 0),), ((0, 1),)], + does_not_raise, + {0: {0: 0, 1: 0}, 1: {0: 0, 1: 1}}, + True, + ), ([{0: 0}, {0: 1}], ValueError, None, None), ], ) From 104e2dc8b6ca85a20a2012b408ac133e0e5bfa64 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 18:45:12 +0100 Subject: [PATCH 03/20] Use tree_map. --- src/_pytask/nodes.py | 32 +++++++-------- src/_pytask/resolve_dependencies.py | 11 +++-- tests/test_nodes.py | 64 ----------------------------- tests/test_pybaum.py | 3 +- 4 files changed, 21 insertions(+), 89 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index de43cc84..09866fa7 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -23,6 +23,7 @@ from _pytask.exceptions import NodeNotFoundError from _pytask.mark_utils import get_marks_from_obj from _pytask.session import Session +from pybaum import tree_map if TYPE_CHECKING: @@ -138,12 +139,12 @@ def from_path_name_function_session( objects = _extract_nodes_from_function_markers(function, depends_on) nodes, keep_dict_de = _convert_objects_to_node_dictionary(objects, "depends_on") keep_dictionary["depends_on"] = keep_dict_de - dependencies = _collect_nodes(session, path, name, nodes) + dependencies = tree_map(lambda x: _collect_node(session, path, name, x), nodes) objects = _extract_nodes_from_function_markers(function, produces) nodes, keep_dict_prod = _convert_objects_to_node_dictionary(objects, "produces") keep_dictionary["produces"] = keep_dict_prod - products = _collect_nodes(session, path, name, nodes) + products = tree_map(lambda x: _collect_node(session, path, name, x), nodes) markers = [ marker @@ -183,7 +184,7 @@ def _get_kwargs_from_task_for_function(self) -> dict[str, Any]: if len(attribute) == 1 and 0 in attribute and not self.keep_dict[arg_name] - else {name: node.value for name, node in attribute.items()} + else tree_map(lambda x: x.value, attribute) ) return kwargs @@ -226,8 +227,8 @@ def state(self) -> str | None: return str(self.path.stat().st_mtime) -def _collect_nodes( - session: Session, path: Path, name: str, nodes: dict[str, str | Path] +def _collect_node( + session: Session, path: Path, name: str, node: str | Path ) -> dict[str, MetaNode]: """Collect nodes for a task. @@ -253,21 +254,16 @@ def _collect_nodes( If the node could not collected. """ - collected_nodes = {} - - for node_name, node in nodes.items(): - collected_node = session.hook.pytask_collect_node( - session=session, path=path, node=node + collected_node = session.hook.pytask_collect_node( + session=session, path=path, node=node + ) + if collected_node is None: + raise NodeNotCollectedError( + f"{node!r} cannot be parsed as a dependency or product for task " + f"{name!r} in {path!r}." ) - if collected_node is None: - raise NodeNotCollectedError( - f"{node!r} cannot be parsed as a dependency or product for task " - f"{name!r} in {path!r}." - ) - else: - collected_nodes[node_name] = collected_node - return collected_nodes + return collected_node def _extract_nodes_from_function_markers( diff --git a/src/_pytask/resolve_dependencies.py b/src/_pytask/resolve_dependencies.py index 33824c32..655b05b8 100644 --- a/src/_pytask/resolve_dependencies.py +++ b/src/_pytask/resolve_dependencies.py @@ -28,6 +28,7 @@ from _pytask.shared import reduce_node_name from _pytask.traceback import render_exc_info from pony import orm +from pybaum import tree_map from rich.text import Text from rich.tree import Tree @@ -75,13 +76,11 @@ def pytask_resolve_dependencies_create_dag(tasks: list[MetaTask]) -> nx.DiGraph: for task in tasks: dag.add_node(task.name, task=task) - for dependency in task.depends_on.values(): - dag.add_node(dependency.name, node=dependency) - dag.add_edge(dependency.name, task.name) + tree_map(lambda x: dag.add_node(x.name, node=x), task.depends_on) + tree_map(lambda x: dag.add_edge(x.name, task.name), task.depends_on) - for product in task.produces.values(): - dag.add_node(product.name, node=product) - dag.add_edge(task.name, product.name) + tree_map(lambda x: dag.add_node(x.name, node=x), task.produces) + tree_map(lambda x: dag.add_edge(task.name, x.name), task.produces) _check_if_dag_has_cycles(dag) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index efcec026..e11834ca 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -17,9 +17,6 @@ from _pytask.nodes import produces from _pytask.shared import reduce_node_name -# from _pytask.nodes import _convert_nodes_to_dictionary -# from _pytask.nodes import _convert_objects_to_list_of_tuples - @pytest.mark.unit @pytest.mark.parametrize("decorator", [pytask.mark.depends_on, pytask.mark.produces]) @@ -111,54 +108,6 @@ def state(self): assert isinstance(task, MetaNode) -# @pytest.mark.unit -# @pytest.mark.parametrize( -# ("x", "when", "expectation", "expected_lot", "expected_kd"), -# [ -# (["string"], "depends_on", does_not_raise(), [("string",)], False), -# (("string",), "depends_on", does_not_raise(), [("string",)], False), -# (range(2), "depends_on", does_not_raise(), [(0,), (1,)], False), -# ( -# [{"a": 0, "b": 1}], -# "depends_on", -# does_not_raise(), -# [("a", 0), ("b", 1)], -# False, -# ), -# ( -# ["a", ("b", "c"), {"d": 1, "e": 1}], -# "depends_on", -# does_not_raise(), -# [("a",), ("b",), ("c",), ("d", 1), ("e", 1)], -# False, -# ), -# ([["string"]], "depends_on", does_not_raise(), [("string",)], True), -# ([{0: "string"}], "depends_on", does_not_raise(), [(0, "string")], True), -# ( -# [((0, 1, 2),)], -# "depends_on", -# pytest.raises(ValueError, match="Dependencies in pytask.mark.depends_on"), -# None, -# None, -# ), -# ( -# [((0, 1, 2),)], -# "produces", -# pytest.raises(ValueError, match="Products in pytask.mark.produces"), -# None, -# None, -# ), -# ], -# ) -# def test_convert_objects_to_list_of_tuples( -# x, when, expectation, expected_lot, expected_kd -# ): -# with expectation: -# list_of_tuples, keep_dict = _convert_objects_to_list_of_tuples(x, when) -# assert list_of_tuples == expected_lot -# assert keep_dict is expected_kd - - ERROR = "'@pytask.mark.depends_on' has nodes with the same name:" @@ -179,19 +128,6 @@ def test_check_that_names_are_not_used_multiple_times(x, expectation): _check_that_names_are_not_used_multiple_times(x, "depends_on") -# @pytest.mark.unit -# @pytest.mark.parametrize( -# ("x", "expected"), -# [ -# ([("a",), ("b",)], {0: "a", 1: "b"}), -# ([(1, "a"), ("b",), (0, "c")], {1: "a", 2: "b", 0: "c"}), -# ], -# ) -# def test_convert_nodes_to_dictionary(x, expected): -# result = _convert_nodes_to_dictionary(x) -# assert result == expected - - @pytest.mark.unit @pytest.mark.parametrize( "path, name, expected", diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py index e2944b5e..d3d0c47f 100644 --- a/tests/test_pybaum.py +++ b/tests/test_pybaum.py @@ -4,6 +4,7 @@ import textwrap import pytest +from pybaum import tree_map from pytask import main @@ -31,7 +32,7 @@ def task_example(): assert session.exit_code == 1 - products = session.tasks[0].produces + products = tree_map(lambda x: x.value, session.tasks[0].produces) expected = { 0: tmp_path / "out.txt", 1: {0: tmp_path / "tuple_out.txt"}, From c4383ad0d6295542f5c60909a00f381aedce9d73 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 19:01:05 +0100 Subject: [PATCH 04/20] Extend test to depends_on. --- tests/test_pybaum.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py index d3d0c47f..e90e1f77 100644 --- a/tests/test_pybaum.py +++ b/tests/test_pybaum.py @@ -9,20 +9,25 @@ @pytest.mark.end_to_end -def test_task_with_complex_product_did_not_produce_node(tmp_path): - source = """ +@pytest.mark.parametrize( + "decorator_name, exit_code", [("depends_on", 4), ("produces", 1)] +) +def test_task_with_complex_product_did_not_produce_node( + tmp_path, decorator_name, exit_code +): + source = f""" import pytask - complex_product = [ + complex = [ "out.txt", ("tuple_out.txt",), ["list_out.txt"], - {"a": "dict_out.txt", "b": {"c": "dict_out_2.txt"}}, + {{"a": "dict_out.txt", "b": {{"c": "dict_out_2.txt"}}}}, ] - @pytask.mark.produces(complex_product) + @pytask.mark.{decorator_name}(complex) def task_example(): pass """ @@ -30,9 +35,9 @@ def task_example(): session = main({"paths": tmp_path}) - assert session.exit_code == 1 + assert session.exit_code == exit_code - products = tree_map(lambda x: x.value, session.tasks[0].produces) + products = tree_map(lambda x: x.value, getattr(session.tasks[0], decorator_name)) expected = { 0: tmp_path / "out.txt", 1: {0: tmp_path / "tuple_out.txt"}, From a758e5ab4af4551351fe4176f53ce1aa9517fc68 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 23:31:13 +0100 Subject: [PATCH 05/20] Fix functio. --- src/_pytask/nodes.py | 14 ++++++++++---- tests/test_nodes.py | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 09866fa7..b6c8f1dd 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -360,10 +360,16 @@ def merge_dictionaries( keep_dict = not placeholder.scalar else: counter = itertools.count() - out = { - next(counter) if isinstance(k, _Placeholder) else k: v - for k, v in merged_dict.items() - } + out = {} + for k, v in merged_dict.items(): + if isinstance(k, _Placeholder): + while True: + possible_key = next(counter) + if possible_key not in merged_dict and possible_key not in out: + out[possible_key] = v + break + else: + out[k] = v keep_dict = True return out, keep_dict diff --git a/tests/test_nodes.py b/tests/test_nodes.py index e11834ca..5001c3a9 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -197,6 +197,7 @@ def test_reduce_node_name(node, paths, expectation, expected): {0: {0: 0, 1: 0}, 1: {0: 0, 1: 1}}, True, ), + ([{0: {0: {0: 0}}}, [2]], does_not_raise, {0: {0: {0: 0}}, 1: 2}, True), ([{0: 0}, {0: 1}], ValueError, None, None), ], ) From 29bc3eabe11977bde405795ce163766ab8e4e73f Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Mon, 31 Jan 2022 23:39:07 +0100 Subject: [PATCH 06/20] Fix docstrings and add examples. --- src/_pytask/nodes.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index b6c8f1dd..ef88701b 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -336,7 +336,20 @@ def _check_that_names_are_not_used_multiple_times( ) -def union(dicts: list[dict[Any, Any]]) -> dict[Any, Any]: +def union_of_dictionaries(dicts: list[dict[Any, Any]]) -> dict[Any, Any]: + """Merge multiple dictionaries in one. + + Examples + -------- + >>> a, b = {"a": 0}, {"b": 1} + >>> union_of_dictionaries([a, b]) + {'a': 0, 'b': 1} + + >>> a, b = {'a': 0}, {'a': 1} + >>> union_of_dictionaries([a, b]) + {'a': 1} + + """ return dict(itertools.chain.from_iterable(dict_.items() for dict_ in dicts)) @@ -345,14 +358,22 @@ def merge_dictionaries( ) -> tuple[dict[Any, Any], bool]: """Merge multiple dictionaries. - The function does not perform a deep merge. It checks whether first level keys are - unique and fills provisional keys with a running value. + The function does not perform a deep merge. It simply merges the dictionary based on + the first level keys which are either unique names or placeholders. During the merge + placeholders will be replaced by an incrementing integer. - Tuples in the list have either one or two elements. The first element in the two - element tuples is the name and cannot occur twice. + Examples + -------- + >>> a, b = {"a": 0}, {"b": 1} + >>> merge_dictionaries([a, b]) + ({'a': 0, 'b': 1}, True) + + >>> a, b = {_Placeholder(): 0}, {_Placeholder(): 1} + >>> merge_dictionaries([a, b]) + ({0: 0, 1: 1}, True) """ - merged_dict = union(list_of_dicts) + merged_dict = union_of_dictionaries(list_of_dicts) if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): placeholder, value = list(merged_dict.items())[0] From 9f640d1f45b7ddadea42bd5fd5e89eb4de48bba9 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 1 Feb 2022 01:25:02 +0100 Subject: [PATCH 07/20] add a section to the docs. --- .../how_to_define_dependencies_products.rst | 109 ++++++++++++++---- 1 file changed, 84 insertions(+), 25 deletions(-) diff --git a/docs/source/tutorials/how_to_define_dependencies_products.rst b/docs/source/tutorials/how_to_define_dependencies_products.rst index 7107e8b7..8cdcd9ea 100644 --- a/docs/source/tutorials/how_to_define_dependencies_products.rst +++ b/docs/source/tutorials/how_to_define_dependencies_products.rst @@ -85,53 +85,56 @@ Multiple dependencies and products ---------------------------------- Most tasks have multiple dependencies or products. The easiest way to attach multiple -dependencies or products to a task is to pass a :class:`dict`, :class:`list` or another -iterator to the marker containing the paths. +dependencies or products to a task is to pass a :class:`dict` (highly recommended), +:class:`list` or another iterator to the marker containing the paths. + +To assign labels to dependencies or products, pass a dictionary. For example, .. code-block:: python - @pytask.mark.produces([BLD / "data_0.pkl", BLD / "data_1.pkl"]) + @pytask.mark.produces({"first": BLD / "data_0.pkl", "second": BLD / "data_1.pkl"}) def task_create_random_data(produces): ... -Inside the function, the arguments ``depends_on`` or ``produces`` become a dictionary -where keys are the positions in the list. +Then, use .. code-block:: pycon - >>> produces - {0: BLD / "data_0.pkl", 1: BLD / "data_1.pkl"} - -Why dictionaries and not lists? First, dictionaries with positions as keys behave very -similar to lists and conversion between both is easy. - -.. tip:: + >>> produces["first"] + BLD / "data_0.pkl" - Use ``list(produces.values())`` to convert a dictionary to a list. + >>> produces["second"] + BLD / "data_1.pkl" -Secondly, dictionaries use keys instead of positions which is more verbose and -descriptive and does not assume a fixed ordering. Both attributes are especially -desirable in complex projects. +inside the task function. -To assign labels to dependencies or products, pass a dictionary. For example, +You can also use lists and other iterables. .. code-block:: python - @pytask.mark.produces({"first": BLD / "data_0.pkl", "second": BLD / "data_1.pkl"}) + @pytask.mark.produces([BLD / "data_0.pkl", BLD / "data_1.pkl"]) def task_create_random_data(produces): ... -Then, use +Inside the function, the arguments ``depends_on`` or ``produces`` become a dictionary +where keys are the positions in the list. .. code-block:: pycon - >>> produces["first"] - BLD / "data_0.pkl" + >>> produces + {0: BLD / "data_0.pkl", 1: BLD / "data_1.pkl"} - >>> produces["second"] - BLD / "data_1.pkl" +Why does pytask recommend dictionaries and even converts lists to dictionaries? First, +dictionaries with positions as keys behave very similar to lists and conversion between +both is easy. -inside the task function. +.. tip:: + + Use ``list(produces.values())`` to convert a dictionary to a list. + +Secondly, dictionaries use keys instead of positions which is more verbose and +descriptive and does not assume a fixed ordering. Both attributes are especially +desirable in complex projects. Multiple decorators @@ -152,8 +155,64 @@ tasks. ... +Nested dependencies and products +-------------------------------- + +Dependencies and products are also allowed to be nested containers consisting of tuples, +lists, and dictionaries. In situations where you want more structure and are otherwise +forced to flatten your inputs, this can be beneficial. + +Here is an example with a task which fits some model on data. It depends on some model +files which are not actively used, but make sure the task is rerun when the model is +changed. And, it depends on data which is actively used. + +.. code-block:: python + + @pytask.mark.depends_on( + { + "model": [SRC / "models" / "model.py"], + "data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}, + } + ) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): + ... + +It is also possible to merge nested containers. For example, you might want to reuse +the dependency on models for other tasks as well. + +.. code-block:: python + + model_dependencies = pytask.mark.depends_on({"model": [SRC / "models" / "model.py"]}) + + + @model_dependencies + @pytask.mark.depends_on( + {"data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}} + ) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): + ... + +In both cases, ``depends_on`` within the function will be + +.. code-block:: python + + { + "model": [SRC / "models" / "model.py"], + "data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}, + } + +.. seealso:: + + The general concept behind nested objects like tuples, lists, and dictionaries is + called pytrees and is more extensively explained in the `documentation of pybaum + `_ which serves pytask under the + hood. + + References ---------- .. [1] The official documentation for :mod:`pathlib`. -.. [2] A guide for pathlib at `RealPython `_. +.. [2] A guide for pathlib by `realpython `_. From 9bb85fe34153e10d8be572ec5af7a073ee640eb9 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 1 Feb 2022 02:23:51 +0100 Subject: [PATCH 08/20] more tests. --- tests/test_nodes.py | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 5001c3a9..a465f0ba 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from contextlib import ExitStack as does_not_raise # noqa: N813 from pathlib import Path @@ -9,9 +10,12 @@ from _pytask.nodes import _check_that_names_are_not_used_multiple_times from _pytask.nodes import _convert_objects_to_node_dictionary from _pytask.nodes import _extract_nodes_from_function_markers +from _pytask.nodes import _Placeholder +from _pytask.nodes import convert_to_dict from _pytask.nodes import create_task_name from _pytask.nodes import depends_on from _pytask.nodes import FilePathNode +from _pytask.nodes import merge_dictionaries from _pytask.nodes import MetaNode from _pytask.nodes import MetaTask from _pytask.nodes import produces @@ -213,3 +217,49 @@ def test_convert_objects_to_node_dictionary( node_dict, keep_dict = _convert_objects_to_node_dictionary(objects, when) assert node_dict == expected_dict assert keep_dict is expected_kd + + +def _convert_placeholders_to_tuples(x): + counter = itertools.count() + return { + (next(counter), k.scalar) + if isinstance(k, _Placeholder) + else k: _convert_placeholders_to_tuples(v) + if isinstance(v, dict) + else v + for k, v in x.items() + } + + +@pytest.mark.unit +@pytest.mark.parametrize( + "x, first_level, expected", + [ + (1, True, {(0, True): 1}), + ({1: 0}, False, {1: 0}), + ({1: [2, 3]}, False, {1: {0: 2, 1: 3}}), + ([2, 3], True, {(0, False): 2, (1, False): 3}), + ([2, 3], False, {0: 2, 1: 3}), + ], +) +def test_convert_to_dict(x, first_level, expected): + """We convert placeholders to a tuple consisting of the key and the scalar bool.""" + result = convert_to_dict(x, first_level) + modified_result = _convert_placeholders_to_tuples(result) + assert modified_result == expected + + +@pytest.mark.unit +@pytest.mark.parametrize( + "list_of_dicts, expected_dict, expected_keep", + [ + ([{1: 0}, {0: 1}], {1: 0, 0: 1}, True), + ([{_Placeholder(): 1}, {0: 0}], {1: 1, 0: 0}, True), + ([{_Placeholder(scalar=True): 1}], {0: 1}, False), + ([{_Placeholder(scalar=False): 1}], {0: 1}, True), + ], +) +def test_merge_dictionaries(list_of_dicts, expected_dict, expected_keep): + result_dict, result_keep = merge_dictionaries(list_of_dicts) + assert result_dict == expected_dict + assert result_keep is expected_keep From f449239b60a88a0928f7074a53048d670d2875de Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 1 Feb 2022 08:50:42 +0100 Subject: [PATCH 09/20] Add test csae. --- tests/test_collect.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/test_collect.py b/tests/test_collect.py index f1c724bc..ea1452bf 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -140,12 +140,23 @@ def test_collect_files_w_custom_file_name_pattern( Path.cwd() / "text.txt", id="test with absolute string path", ), + pytest.param( + Session({"check_casing_of_paths": False}, None), + Path(), + 1, + does_not_raise(), + None, + id="test cannot collect node", + ), ], ) def test_pytask_collect_node(session, path, node, expectation, expected): with expectation: result = pytask_collect_node(session, path, node) - assert str(result.path) == str(expected) + if result is None: + assert result is expected + else: + assert str(result.path) == str(expected) @pytest.mark.unit From ea448f67d69d8a0a616a19094d3fbb79ccd6f4d5 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 1 Feb 2022 09:03:08 +0100 Subject: [PATCH 10/20] add more docs. --- .../how_to_define_dependencies_products.rst | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/source/tutorials/how_to_define_dependencies_products.rst b/docs/source/tutorials/how_to_define_dependencies_products.rst index 8cdcd9ea..657db504 100644 --- a/docs/source/tutorials/how_to_define_dependencies_products.rst +++ b/docs/source/tutorials/how_to_define_dependencies_products.rst @@ -158,13 +158,13 @@ tasks. Nested dependencies and products -------------------------------- -Dependencies and products are also allowed to be nested containers consisting of tuples, +Dependencies and products are allowed to be nested containers consisting of tuples, lists, and dictionaries. In situations where you want more structure and are otherwise forced to flatten your inputs, this can be beneficial. -Here is an example with a task which fits some model on data. It depends on some model -files which are not actively used, but make sure the task is rerun when the model is -changed. And, it depends on data which is actively used. +Here is an example with a task which fits some model on data. It depends on a module +containing the code for the model which is not actively used, but ensures that the task +is rerun when the model is changed. And, it depends on data. .. code-block:: python @@ -203,6 +203,23 @@ In both cases, ``depends_on`` within the function will be "data": {"a": SRC / "data" / "a.pkl", "b": SRC / "data" / "b.pkl"}, } +Tuples and lists are converted to dictionaries with integer keys. The innermost +decorator is evaluated first. + +.. code-block:: python + + @pytask.mark.depends_on([SRC / "models" / "model.py"]) + @pytask.mark.depends_on([SRC / "data" / "a.pkl", SRC / "data" / "b.pkl"]) + @pytask.mark.produces(BLD / "models" / "fitted_model.pkl") + def task_fit_model(): + ... + +would give + +.. code-block:: python + + {0: SRC / "data" / "a.pkl", 1: SRC / "data" / "b.pkl", 2: SRC / "models" / "model.py"} + .. seealso:: The general concept behind nested objects like tuples, lists, and dictionaries is From 86f2246d35d521b876ffaab8417327bafb3c0aeb Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Tue, 1 Feb 2022 12:50:36 +0100 Subject: [PATCH 11/20] TEMP COMMIT USE SPECIAL KEY INSTEAD OF KEEP_DICT. --- src/_pytask/nodes.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index ef88701b..29d463c4 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -179,13 +179,24 @@ def _get_kwargs_from_task_for_function(self) -> dict[str, Any]: for arg_name in ["depends_on", "produces"]: if arg_name in func_arg_names: attribute = getattr(self, arg_name) - kwargs[arg_name] = ( - attribute[0].value - if len(attribute) == 1 - and 0 in attribute - and not self.keep_dict[arg_name] - else tree_map(lambda x: x.value, attribute) - ) + if len(attribute) == 1: + key, node = list(attribute.items())[0] + if isinstance(key, _Placeholder) and key.scalar: + kwargs[arg_name] = node.value + elif isinstance(key, _Placeholder) and not key.scalar: + kwargs[arg_name] = {0: node.value} + else: + kwargs[arg_name] = {key: node.value} + else: + kwargs[arg_name] = tree_map(lambda x: x.value, attribute) + + # kwargs[arg_name] = ( + # attribute[0].value + # if len(attribute) == 1 + # and 0 in attribute + # and not self.keep_dict[arg_name] + # else tree_map(lambda x: x.value, attribute) + # ) return kwargs @@ -377,7 +388,8 @@ def merge_dictionaries( if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): placeholder, value = list(merged_dict.items())[0] - out = {0: value} + # out = {0: value} + out = merged_dict keep_dict = not placeholder.scalar else: counter = itertools.count() From 7559267612491372bcae006e26e149a27a19797c Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 01:05:39 +0100 Subject: [PATCH 12/20] Revert "TEMP COMMIT USE SPECIAL KEY INSTEAD OF KEEP_DICT." This reverts commit 86f2246d35d521b876ffaab8417327bafb3c0aeb. --- src/_pytask/nodes.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 29d463c4..ef88701b 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -179,24 +179,13 @@ def _get_kwargs_from_task_for_function(self) -> dict[str, Any]: for arg_name in ["depends_on", "produces"]: if arg_name in func_arg_names: attribute = getattr(self, arg_name) - if len(attribute) == 1: - key, node = list(attribute.items())[0] - if isinstance(key, _Placeholder) and key.scalar: - kwargs[arg_name] = node.value - elif isinstance(key, _Placeholder) and not key.scalar: - kwargs[arg_name] = {0: node.value} - else: - kwargs[arg_name] = {key: node.value} - else: - kwargs[arg_name] = tree_map(lambda x: x.value, attribute) - - # kwargs[arg_name] = ( - # attribute[0].value - # if len(attribute) == 1 - # and 0 in attribute - # and not self.keep_dict[arg_name] - # else tree_map(lambda x: x.value, attribute) - # ) + kwargs[arg_name] = ( + attribute[0].value + if len(attribute) == 1 + and 0 in attribute + and not self.keep_dict[arg_name] + else tree_map(lambda x: x.value, attribute) + ) return kwargs @@ -388,8 +377,7 @@ def merge_dictionaries( if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): placeholder, value = list(merged_dict.items())[0] - # out = {0: value} - out = merged_dict + out = {0: value} keep_dict = not placeholder.scalar else: counter = itertools.count() From abaddf70b2be117d28c4d51fa30ffe3f935bc3bc Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 01:44:23 +0100 Subject: [PATCH 13/20] Recategorize tests. --- tests/test_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_task.py b/tests/test_task.py index 77241c2a..4bfffde4 100644 --- a/tests/test_task.py +++ b/tests/test_task.py @@ -8,7 +8,7 @@ from pytask import main -@pytest.mark.unit +@pytest.mark.end_to_end @pytest.mark.parametrize("func_name", ["task_example", "func"]) @pytest.mark.parametrize("task_name", ["the_only_task", None]) def test_task_with_task_decorator(tmp_path, func_name, task_name): @@ -37,7 +37,7 @@ def {func_name}(produces): ) -@pytest.mark.unit +@pytest.mark.end_to_end @pytest.mark.parametrize("func_name", ["task_example", "func"]) @pytest.mark.parametrize("task_name", ["the_only_task", None]) def test_task_with_task_decorator_with_parametrize(tmp_path, func_name, task_name): From 7479d8e827a8e074166f0602072f41409bfc0e44 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 01:45:01 +0100 Subject: [PATCH 14/20] Use pybaum throughout pytask. --- src/_pytask/clean.py | 3 ++- src/_pytask/collect_command.py | 18 +++++++------- src/_pytask/nodes.py | 44 +++++++++++----------------------- tests/test_nodes.py | 41 ++++++++++++++----------------- 4 files changed, 43 insertions(+), 63 deletions(-) diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index 0e7afa8c..2ac62c03 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -25,6 +25,7 @@ from _pytask.session import Session from _pytask.shared import get_first_non_none_value from _pytask.traceback import render_exc_info +from pybaum.tree_util import tree_just_flatten if TYPE_CHECKING: @@ -189,7 +190,7 @@ def _yield_paths_from_task(task: MetaTask) -> Generator[Path, None, None]: """Yield all paths attached to a task.""" yield task.path for attribute in ["depends_on", "produces"]: - for node in getattr(task, attribute).values(): + for node in tree_just_flatten(getattr(task, attribute)): if hasattr(node, "path") and isinstance(node.path, Path): yield node.path diff --git a/src/_pytask/collect_command.py b/src/_pytask/collect_command.py index 9c4aa22e..56fbe978 100644 --- a/src/_pytask/collect_command.py +++ b/src/_pytask/collect_command.py @@ -24,6 +24,7 @@ from _pytask.path import relative_to from _pytask.pluginmanager import get_plugin_manager from _pytask.session import Session +from pybaum.tree_util import tree_just_flatten from rich.text import Text from rich.tree import Tree @@ -124,13 +125,8 @@ def _find_common_ancestor_of_all_nodes( for task in tasks: all_paths.append(task.path) if show_nodes: - all_paths.extend( - [ - node.path - for attr in ("depends_on", "produces") - for node in getattr(task, attr).values() - ] - ) + all_paths.extend(map(lambda x: x.path, tree_just_flatten(task.depends_on))) + all_paths.extend(map(lambda x: x.path, tree_just_flatten(task.produces))) common_ancestor = find_common_ancestor(*all_paths, *paths) @@ -200,7 +196,9 @@ def _print_collected_tasks( ) if show_nodes: - for node in sorted(task.depends_on.values(), key=lambda x: x.path): + for node in sorted( + tree_just_flatten(task.depends_on), key=lambda x: x.path + ): reduced_node_name = relative_to(node.path, common_ancestor) url_style = create_url_style_for_path(node.path, editor_url_scheme) task_branch.add( @@ -212,7 +210,9 @@ def _print_collected_tasks( ) ) - for node in sorted(task.produces.values(), key=lambda x: x.path): + for node in sorted( + tree_just_flatten(task.produces), key=lambda x: x.path + ): reduced_node_name = relative_to(node.path, common_ancestor) url_style = create_url_style_for_path(node.path, editor_url_scheme) task_branch.add( diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index ef88701b..49eef1b0 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -118,8 +118,6 @@ class PythonFunctionTask(MetaTask): """Dict[str, MetaNode]: A list of products of task.""" markers = attr.ib(factory=list, type="List[Mark]") """Optional[List[Mark]]: A list of markers attached to the task function.""" - keep_dict = attr.ib(factory=dict, type=Dict[str, bool]) - """Dict[str, bool]: Should dictionaries for single nodes be preserved?""" _report_sections = attr.ib(factory=list, type=List[Tuple[str, str, str]]) """List[Tuple[str, str, str]]: Reports with entries for when, what, and content.""" attributes = attr.ib(factory=dict, type=Dict[Any, Any]) @@ -134,16 +132,12 @@ def from_path_name_function_session( cls, path: Path, name: str, function: Callable[..., Any], session: Session ) -> PythonFunctionTask: """Create a task from a path, name, function, and session.""" - keep_dictionary = {} - objects = _extract_nodes_from_function_markers(function, depends_on) - nodes, keep_dict_de = _convert_objects_to_node_dictionary(objects, "depends_on") - keep_dictionary["depends_on"] = keep_dict_de + nodes = _convert_objects_to_node_dictionary(objects, "depends_on") dependencies = tree_map(lambda x: _collect_node(session, path, name, x), nodes) objects = _extract_nodes_from_function_markers(function, produces) - nodes, keep_dict_prod = _convert_objects_to_node_dictionary(objects, "produces") - keep_dictionary["produces"] = keep_dict_prod + nodes = _convert_objects_to_node_dictionary(objects, "produces") products = tree_map(lambda x: _collect_node(session, path, name, x), nodes) markers = [ @@ -160,7 +154,6 @@ def from_path_name_function_session( depends_on=dependencies, produces=products, markers=markers, - keep_dict=keep_dictionary, ) def execute(self) -> None: @@ -179,13 +172,7 @@ def _get_kwargs_from_task_for_function(self) -> dict[str, Any]: for arg_name in ["depends_on", "produces"]: if arg_name in func_arg_names: attribute = getattr(self, arg_name) - kwargs[arg_name] = ( - attribute[0].value - if len(attribute) == 1 - and 0 in attribute - and not self.keep_dict[arg_name] - else tree_map(lambda x: x.value, attribute) - ) + kwargs[arg_name] = tree_map(lambda x: x.value, attribute) return kwargs @@ -282,14 +269,12 @@ def _extract_nodes_from_function_markers( yield parsed -def _convert_objects_to_node_dictionary( - objects: Any, when: str -) -> tuple[dict[Any, Any], bool]: +def _convert_objects_to_node_dictionary(objects: Any, when: str) -> dict[Any, Any]: """Convert objects to node dictionary.""" list_of_dicts = [convert_to_dict(x) for x in objects] _check_that_names_are_not_used_multiple_times(list_of_dicts, when) - nodes, keep_dict = merge_dictionaries(list_of_dicts) - return nodes, keep_dict + nodes = merge_dictionaries(list_of_dicts) + return nodes @attr.s(frozen=True) @@ -353,9 +338,7 @@ def union_of_dictionaries(dicts: list[dict[Any, Any]]) -> dict[Any, Any]: return dict(itertools.chain.from_iterable(dict_.items() for dict_ in dicts)) -def merge_dictionaries( - list_of_dicts: list[dict[Any, Any]] -) -> tuple[dict[Any, Any], bool]: +def merge_dictionaries(list_of_dicts: list[dict[Any, Any]]) -> dict[Any, Any]: """Merge multiple dictionaries. The function does not perform a deep merge. It simply merges the dictionary based on @@ -366,19 +349,21 @@ def merge_dictionaries( -------- >>> a, b = {"a": 0}, {"b": 1} >>> merge_dictionaries([a, b]) - ({'a': 0, 'b': 1}, True) + {'a': 0, 'b': 1} >>> a, b = {_Placeholder(): 0}, {_Placeholder(): 1} >>> merge_dictionaries([a, b]) - ({0: 0, 1: 1}, True) + {0: 0, 1: 1} """ merged_dict = union_of_dictionaries(list_of_dicts) if len(merged_dict) == 1 and isinstance(list(merged_dict)[0], _Placeholder): placeholder, value = list(merged_dict.items())[0] - out = {0: value} - keep_dict = not placeholder.scalar + if placeholder.scalar: + out = value + else: + out = {0: value} else: counter = itertools.count() out = {} @@ -391,9 +376,8 @@ def merge_dictionaries( break else: out[k] = v - keep_dict = True - return out, keep_dict + return out def create_task_name(path: Path, base_name: str) -> str: diff --git a/tests/test_nodes.py b/tests/test_nodes.py index a465f0ba..5e979d16 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -189,34 +189,30 @@ def test_reduce_node_name(node, paths, expectation, expected): @pytest.mark.integration @pytest.mark.parametrize("when", ["depends_on", "produces"]) @pytest.mark.parametrize( - "objects, expectation, expected_dict, expected_kd", + "objects, expectation, expected", [ - ([0, 1], does_not_raise, {0: 0, 1: 1}, True), - ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}, True), - ([{0: 0}], does_not_raise, {0: 0}, True), - ([[0]], does_not_raise, {0: 0}, True), + ([0, 1], does_not_raise, {0: 0, 1: 1}), + ([{0: 0}, {1: 1}], does_not_raise, {0: 0, 1: 1}), + ([{0: 0}], does_not_raise, {0: 0}), + ([[0]], does_not_raise, {0: 0}), ( [((0, 0),), ((0, 1),)], does_not_raise, {0: {0: 0, 1: 0}, 1: {0: 0, 1: 1}}, - True, ), - ([{0: {0: {0: 0}}}, [2]], does_not_raise, {0: {0: {0: 0}}, 1: 2}, True), - ([{0: 0}, {0: 1}], ValueError, None, None), + ([{0: {0: {0: 0}}}, [2]], does_not_raise, {0: {0: {0: 0}}, 1: 2}), + ([{0: 0}, {0: 1}], ValueError, None), ], ) -def test_convert_objects_to_node_dictionary( - objects, when, expectation, expected_dict, expected_kd -): +def test_convert_objects_to_node_dictionary(objects, when, expectation, expected): expectation = ( pytest.raises(expectation, match=f"'@pytask.mark.{when}' has nodes") if expectation == ValueError else expectation() ) with expectation: - node_dict, keep_dict = _convert_objects_to_node_dictionary(objects, when) - assert node_dict == expected_dict - assert keep_dict is expected_kd + nodes = _convert_objects_to_node_dictionary(objects, when) + assert nodes == expected def _convert_placeholders_to_tuples(x): @@ -251,15 +247,14 @@ def test_convert_to_dict(x, first_level, expected): @pytest.mark.unit @pytest.mark.parametrize( - "list_of_dicts, expected_dict, expected_keep", + "list_of_dicts, expected", [ - ([{1: 0}, {0: 1}], {1: 0, 0: 1}, True), - ([{_Placeholder(): 1}, {0: 0}], {1: 1, 0: 0}, True), - ([{_Placeholder(scalar=True): 1}], {0: 1}, False), - ([{_Placeholder(scalar=False): 1}], {0: 1}, True), + ([{1: 0}, {0: 1}], {1: 0, 0: 1}), + ([{_Placeholder(): 1}, {0: 0}], {1: 1, 0: 0}), + ([{_Placeholder(scalar=True): 1}], 1), + ([{_Placeholder(scalar=False): 1}], {0: 1}), ], ) -def test_merge_dictionaries(list_of_dicts, expected_dict, expected_keep): - result_dict, result_keep = merge_dictionaries(list_of_dicts) - assert result_dict == expected_dict - assert result_keep is expected_keep +def test_merge_dictionaries(list_of_dicts, expected): + result = merge_dictionaries(list_of_dicts) + assert result == expected From d7f1f9bdcc6e49983acc16aa9496546d08784c03 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 01:56:17 +0100 Subject: [PATCH 15/20] Test profile with complex deps. --- src/_pytask/clean.py | 2 +- tests/test_pybaum.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index 2ac62c03..e4626b5b 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -234,7 +234,7 @@ class _RecursivePathNode: """ path = attr.ib(type=Path) - sub_nodes = attr.ib(type="list[_RecursivePathNode]") + sub_nodes = attr.ib(type=list["_RecursivePathNode"]) is_dir = attr.ib(type=bool) is_file = attr.ib(type=bool) is_unknown = attr.ib(type=bool) diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py index e90e1f77..5a3eead6 100644 --- a/tests/test_pybaum.py +++ b/tests/test_pybaum.py @@ -4,7 +4,9 @@ import textwrap import pytest +from _pytask.outcomes import ExitCode from pybaum import tree_map +from pytask import cli from pytask import main @@ -45,3 +47,30 @@ def task_example(): 3: {"a": tmp_path / "dict_out.txt", "b": {"c": tmp_path / "dict_out_2.txt"}}, } assert products == expected + + +@pytest.mark.end_to_end +def test_profile_with_pybaum(tmp_path, runner): + source = """ + import time + import pytask + from pybaum.tree_util import tree_just_flatten + + @pytask.mark.produces([{"out_1": "out_1.txt"}, {"out_2": "out_2.txt"}]) + def task_example(produces): + time.sleep(2) + for p in tree_just_flatten(produces): + p.write_text("There are nine billion bicycles in Beijing.") + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + + result = runner.invoke(cli, [tmp_path.as_posix()]) + + result = runner.invoke(cli, ["profile", tmp_path.as_posix()]) + + assert result.exit_code == ExitCode.OK + assert "Collected 1 task." in result.output + assert "Duration (in s)" in result.output + assert "0." in result.output + assert "Size of Products" in result.output + assert "86 bytes" in result.output From 3abab34a872a81a82961eaf11e5223ce6e5a73c9 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 02:04:51 +0100 Subject: [PATCH 16/20] Categorize tests. --- tests/test_profile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_profile.py b/tests/test_profile.py index ff7a6efb..d076bb21 100644 --- a/tests/test_profile.py +++ b/tests/test_profile.py @@ -109,6 +109,7 @@ def task_example(): time.sleep(2) assert tmp_path.joinpath(f"profile.{export}").exists() +@pytest.mark.unit @pytest.mark.parametrize( "bytes_, units, expected", [ From 923dde55657a46d746e3191fce6a268404a16f7d Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 2 Feb 2022 16:47:18 +0100 Subject: [PATCH 17/20] Fix type issue. --- src/_pytask/clean.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index e4626b5b..cd2f19c9 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -9,6 +9,7 @@ from typing import Any from typing import Generator from typing import Iterable +from typing import List from typing import TYPE_CHECKING import attr @@ -234,7 +235,7 @@ class _RecursivePathNode: """ path = attr.ib(type=Path) - sub_nodes = attr.ib(type=list["_RecursivePathNode"]) + sub_nodes = attr.ib(type=List["_RecursivePathNode"]) is_dir = attr.ib(type=bool) is_file = attr.ib(type=bool) is_unknown = attr.ib(type=bool) From 165bebf12b9d646a5d1d7a3e602838697b13b578 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 5 Feb 2022 15:55:00 +0100 Subject: [PATCH 18/20] Add some tests. --- tests/test_pybaum.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_pybaum.py b/tests/test_pybaum.py index 5a3eead6..052e7591 100644 --- a/tests/test_pybaum.py +++ b/tests/test_pybaum.py @@ -65,6 +65,7 @@ def task_example(produces): tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.OK result = runner.invoke(cli, ["profile", tmp_path.as_posix()]) From c58582299f08cca72e87e77cb29f9dd07ec9ba01 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Sat, 19 Feb 2022 18:20:08 +0100 Subject: [PATCH 19/20] Use tree_just_yield. --- src/_pytask/clean.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/_pytask/clean.py b/src/_pytask/clean.py index 998407f3..24580d28 100644 --- a/src/_pytask/clean.py +++ b/src/_pytask/clean.py @@ -27,7 +27,7 @@ from _pytask.session import Session from _pytask.shared import get_first_non_none_value from _pytask.traceback import render_exc_info -from pybaum.tree_util import tree_just_flatten +from pybaum.tree_util import tree_just_yield if TYPE_CHECKING: @@ -192,7 +192,7 @@ def _yield_paths_from_task(task: MetaTask) -> Generator[Path, None, None]: """Yield all paths attached to a task.""" yield task.path for attribute in ["depends_on", "produces"]: - for node in tree_just_flatten(getattr(task, attribute)): + for node in tree_just_yield(getattr(task, attribute)): if hasattr(node, "path") and isinstance(node.path, Path): yield node.path From 35306411b767f5a1be170e9c6664be3b1ed0d16f Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 23 Feb 2022 22:52:16 +0100 Subject: [PATCH 20/20] Fix release notes. --- docs/source/changes.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/changes.rst b/docs/source/changes.rst index dc3d9d10..89a49bdf 100644 --- a/docs/source/changes.rst +++ b/docs/source/changes.rst @@ -7,7 +7,7 @@ all releases are available on `PyPI `_ and `Anaconda.org `_. -0.1.9 - 2022-xx-xx +0.1.9 - 2022-02-23 ------------------ - :pull:`197` publishes types, and adds classes and functions to the main namespace. @@ -17,6 +17,9 @@ all releases are available on `PyPI `_ and - :pull:`219` removes some leftovers from pytest in :class:`~_pytask.mark.Mark`. - :pull:`221` adds more test cases for parametrizations. - :pull:`222` adds an automated Github Actions job for creating a list pytask plugins. +- :pull:`225` fixes a circular import noticeable in plugins created by :pull:`197`. +- :pull:`226` fixes a bug where the number of items in the live table during the + execution is not exhausted. (Closes :issue:`223`.) 0.1.8 - 2022-02-07