From e8e602d2d14a26c4aabeda0672d8993c18e50498 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 18:51:12 +0100 Subject: [PATCH 1/9] Simplify the teardown of a task. --- docs/source/changes.md | 1 + src/_pytask/execute.py | 15 ++++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/docs/source/changes.md b/docs/source/changes.md index 20753f1e..da1c05b7 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -30,6 +30,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and It is delegated to the check during the execution. - {pull}`481` improves coverage. - {pull}`482` correctly handles names and signatures of {class}`~pytask.PythonNode`. +- {pull}`483` simplifies the teardown of a task. ## 0.4.1 - 2023-10-11 diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index b9fd8319..6da40172 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -196,19 +196,12 @@ def pytask_execute_task(session: Session, task: PTask) -> bool: @hookimpl def pytask_execute_task_teardown(session: Session, task: PTask) -> None: """Check if :class:`_pytask.nodes.PathNode` are produced by a task.""" - missing_nodes = [] - for product in session.dag.successors(task.signature): - node = session.dag.nodes[product]["node"] - if not node.state(): - missing_nodes.append(node) - + missing_nodes = [node for node in tree_leaves(task.produces) if not node.state()] # type: ignore[attr-defined] if missing_nodes: - paths = [ - format_node_name(i, session.config["paths"]).plain for i in missing_nodes - ] + paths = session.config["paths"] + files = [format_node_name(i, paths).plain for i in missing_nodes] # type: ignore[arg-type] formatted = format_strings_as_flat_tree( - paths, - "The task did not produce the following files:\n", + files, "The task did not produce the following files:\n" ) raise NodeNotFoundError(formatted) From 9bf4b1cf7b184e11df457a8b1940e994a2a611de Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 19:10:44 +0100 Subject: [PATCH 2/9] Raise informative error when path nodes point to directories. --- src/_pytask/execute.py | 25 +++++++++++++++++-------- src/_pytask/nodes.py | 6 ++++++ tests/test_execute.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 6da40172..06116f51 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -126,14 +126,23 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: """ for dependency in session.dag.predecessors(task.signature): node = session.dag.nodes[dependency]["node"] - if not node.state(): - msg = f"{task.name} requires missing node {node.name}." - if IS_FILE_SYSTEM_CASE_SENSITIVE: - msg += ( - "\n\n(Hint: Your file-system is case-sensitive. Check the paths' " - "capitalization carefully.)" - ) - raise NodeNotFoundError(msg) + try: + state = node.state() + except Exception as e: # noqa: BLE001 + msg = ( + f"{task.name!r} requires node {node.name!r}, but it raised an " + "exception when requesting its state." + ) + raise NodeNotFoundError(msg) from e + else: + if not state: + msg = f"{task.name!r} requires missing node {node.name!r}." + if IS_FILE_SYSTEM_CASE_SENSITIVE: + msg += ( + "\n\n(Hint: Your file-system is case-sensitive. Check the " + "paths' capitalization carefully.)" + ) + raise NodeNotFoundError(msg) # Create directory for product if it does not exist. Maybe this should be a `setup` # method for the node classes. diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index a296fd47..1ffedf4b 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -177,6 +177,9 @@ def state(self) -> str | None: The state is given by the modification timestamp. """ + if self.path.is_dir(): + msg = f"The path should be a file and not a directory: {self.path}." + raise ValueError(msg) if self.path.exists(): modification_time = self.path.stat().st_mtime return hash_path(self.path, modification_time) @@ -316,6 +319,9 @@ def from_path(cls, path: Path) -> PickleNode: return cls(name=path.as_posix(), path=path) def state(self) -> str | None: + if self.path.is_dir(): + msg = f"The path should be a file and not a directory: {self.path}." + raise ValueError(msg) if self.path.exists(): modification_time = self.path.stat().st_mtime return hash_path(self.path, modification_time) diff --git a/tests/test_execute.py b/tests/test_execute.py index a23a78a0..40379044 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -1061,3 +1061,36 @@ def task_example(a = PythonNode(value={"a": 1}, hash=True)): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.FAILED assert "TypeError: unhashable type: 'dict'" in result.output + + +@pytest.mark.parametrize( + "node", ["Path(__file__).parent", "PickleNode(name='', path=Path(__file__).parent)"] +) +def test_error_when_path_dependency_is_directory(runner, tmp_path, node): + source = f""" + from pathlib import Path + from pytask import PickleNode + + def task_example(path = {node}): ... + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.FAILED + assert "The path should be a file and not a directory" in result.output + + +@pytest.mark.parametrize( + "node", ["Path(__file__).parent", "PickleNode(name='', path=Path(__file__).parent)"] +) +def test_error_when_path_product_is_directory(runner, tmp_path, node): + source = f""" + from pathlib import Path + from pytask import PickleNode, Product + from typing_extensions import Annotated + + def task_example(path: Annotated[..., Product] = {node}): ... + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.FAILED + assert "The path should be a file and not a directory" in result.output From d218fe5212ddffed723003533c77bb85989bcbc2 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Wed, 8 Nov 2023 23:31:54 +0100 Subject: [PATCH 3/9] Fix errors. --- tests/test_execute.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index 40379044..e010c6a2 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -1004,7 +1004,7 @@ def task_d(produces): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.FAILED - assert "NodeNotFoundError: task_d.py::task_d requires" in result.output + assert "NodeNotFoundError: 'task_d.py::task_d' requires" in result.output @pytest.mark.end_to_end() @@ -1024,7 +1024,7 @@ def task_e(in1_: Annotated[Path, node1], in2_: Annotated[Any, node2]): ... result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.FAILED - assert "NodeNotFoundError: task_e.py::task_e requires" in result.output + assert "NodeNotFoundError: 'task_e.py::task_e' requires" in result.output assert "input1" in result.output @@ -1045,7 +1045,7 @@ def task_d(produces): result = runner.invoke(cli, [tmp_path.joinpath("src").as_posix()]) assert result.exit_code == ExitCode.FAILED - assert "NodeNotFoundError: task_d.py::task_d requires" in result.output + assert "NodeNotFoundError: 'task_d.py::task_d' requires" in result.output assert "bld/in.txt" in result.output From 13ad827305060b06f5467a8e2eab179007af569b Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 00:45:19 +0100 Subject: [PATCH 4/9] fix ellipsis. --- tests/test_execute.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_execute.py b/tests/test_execute.py index e010c6a2..bf831d5a 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -1087,8 +1087,9 @@ def test_error_when_path_product_is_directory(runner, tmp_path, node): from pathlib import Path from pytask import PickleNode, Product from typing_extensions import Annotated + from typing import Any - def task_example(path: Annotated[..., Product] = {node}): ... + def task_example(path: Annotated[Any, Product] = {node}): ... """ tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) From 4b8b8ab2ed35f757ea148e63df2020eeed39dbd3 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 00:46:46 +0100 Subject: [PATCH 5/9] To changes. --- docs/source/changes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/changes.md b/docs/source/changes.md index bcfd3d7d..ecbe2459 100644 --- a/docs/source/changes.md +++ b/docs/source/changes.md @@ -8,6 +8,8 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and ## 0.4.3 - 2023-11-xx - {pull}`483` simplifies the teardown of a task. +- {pull}`484` raises more informative error when directories instead of files are used + with path nodes. - {pull}`485` adds missing steps to unconfigure pytask after the job is done which caused flaky tests. From 17e815a6eeccc6efb4ae1bd95f022ee43db592d6 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 09:39:05 +0100 Subject: [PATCH 6/9] Move to check to collection. --- src/_pytask/collect.py | 11 +++++++++++ src/_pytask/nodes.py | 6 ------ tests/test_collect.py | 44 ++++++++++++++++++++++++++++++++++++++++++ tests/test_execute.py | 34 -------------------------------- 4 files changed, 55 insertions(+), 40 deletions(-) diff --git a/src/_pytask/collect.py b/src/_pytask/collect.py index 419bf83e..be9d4d3d 100644 --- a/src/_pytask/collect.py +++ b/src/_pytask/collect.py @@ -303,6 +303,10 @@ def pytask_collect_task( """ +_TEMPLATE_ERROR_DIRECTORY: str = """\ +The path '{path}' points to a directory, although only files are allowed.""" + + @hookimpl(trylast=True) def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PNode: """Collect a node of a task as a :class:`pytask.PNode`. @@ -348,6 +352,9 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN node.path, session.config["paths"] or (session.config["root"],) ) + if isinstance(node, PPathNode) and node.path.is_dir(): + raise ValueError(_TEMPLATE_ERROR_DIRECTORY.format(path=node.path)) + if isinstance(node, PNode): return node @@ -362,6 +369,10 @@ def pytask_collect_node(session: Session, path: Path, node_info: NodeInfo) -> PN node, session.config["check_casing_of_paths"] ) name = shorten_path(node, session.config["paths"] or (session.config["root"],)) + + if isinstance(node, Path) and node.is_dir(): + raise ValueError(_TEMPLATE_ERROR_DIRECTORY.format(path=path)) + return PathNode(name=name, path=node) node_name = create_name_of_python_node(node_info) diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py index 1ffedf4b..a296fd47 100644 --- a/src/_pytask/nodes.py +++ b/src/_pytask/nodes.py @@ -177,9 +177,6 @@ def state(self) -> str | None: The state is given by the modification timestamp. """ - if self.path.is_dir(): - msg = f"The path should be a file and not a directory: {self.path}." - raise ValueError(msg) if self.path.exists(): modification_time = self.path.stat().st_mtime return hash_path(self.path, modification_time) @@ -319,9 +316,6 @@ def from_path(cls, path: Path) -> PickleNode: return cls(name=path.as_posix(), path=path) def state(self) -> str | None: - if self.path.is_dir(): - msg = f"The path should be a file and not a directory: {self.path}." - raise ValueError(msg) if self.path.exists(): modification_time = self.path.stat().st_mtime return hash_path(self.path, modification_time) diff --git a/tests/test_collect.py b/tests/test_collect.py index f811c867..314d2faa 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -572,3 +572,47 @@ def task_example(path: Annotated[Path, Path("file.txt"), Product]) -> None: ... result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.COLLECTION_FAILED assert "is defined twice" in result.output + + +@pytest.mark.parametrize( + "node", + [ + "Path(__file__).parent", + "PathNode(name='path', path=Path(__file__).parent)", + "PickleNode(name='', path=Path(__file__).parent)", + ], +) +def test_error_when_path_dependency_is_directory(runner, tmp_path, node): + source = f""" + from pathlib import Path + from pytask import PickleNode, PathNode + + def task_example(path = {node}): ... + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "only files are allowed" in result.output + + +@pytest.mark.parametrize( + "node", + [ + "Path(__file__).parent", + "PathNode(name='path', path=Path(__file__).parent)", + "PickleNode(name='', path=Path(__file__).parent)", + ], +) +def test_error_when_path_product_is_directory(runner, tmp_path, node): + source = f""" + from pathlib import Path + from pytask import PickleNode, Product, PathNode + from typing_extensions import Annotated + from typing import Any + + def task_example(path: Annotated[Any, Product] = {node}): ... + """ + tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) + result = runner.invoke(cli, [tmp_path.as_posix()]) + assert result.exit_code == ExitCode.COLLECTION_FAILED + assert "only files are allowed" in result.output diff --git a/tests/test_execute.py b/tests/test_execute.py index bf831d5a..8dcf6dab 100644 --- a/tests/test_execute.py +++ b/tests/test_execute.py @@ -1061,37 +1061,3 @@ def task_example(a = PythonNode(value={"a": 1}, hash=True)): result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.FAILED assert "TypeError: unhashable type: 'dict'" in result.output - - -@pytest.mark.parametrize( - "node", ["Path(__file__).parent", "PickleNode(name='', path=Path(__file__).parent)"] -) -def test_error_when_path_dependency_is_directory(runner, tmp_path, node): - source = f""" - from pathlib import Path - from pytask import PickleNode - - def task_example(path = {node}): ... - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.FAILED - assert "The path should be a file and not a directory" in result.output - - -@pytest.mark.parametrize( - "node", ["Path(__file__).parent", "PickleNode(name='', path=Path(__file__).parent)"] -) -def test_error_when_path_product_is_directory(runner, tmp_path, node): - source = f""" - from pathlib import Path - from pytask import PickleNode, Product - from typing_extensions import Annotated - from typing import Any - - def task_example(path: Annotated[Any, Product] = {node}): ... - """ - tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) - result = runner.invoke(cli, [tmp_path.as_posix()]) - assert result.exit_code == ExitCode.FAILED - assert "The path should be a file and not a directory" in result.output From 76750276cb94d8bc9f57cb5a57d021c54ba23a9e Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 09:40:35 +0100 Subject: [PATCH 7/9] Fix. --- src/_pytask/execute.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 06116f51..6da40172 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -126,23 +126,14 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: """ for dependency in session.dag.predecessors(task.signature): node = session.dag.nodes[dependency]["node"] - try: - state = node.state() - except Exception as e: # noqa: BLE001 - msg = ( - f"{task.name!r} requires node {node.name!r}, but it raised an " - "exception when requesting its state." - ) - raise NodeNotFoundError(msg) from e - else: - if not state: - msg = f"{task.name!r} requires missing node {node.name!r}." - if IS_FILE_SYSTEM_CASE_SENSITIVE: - msg += ( - "\n\n(Hint: Your file-system is case-sensitive. Check the " - "paths' capitalization carefully.)" - ) - raise NodeNotFoundError(msg) + if not node.state(): + msg = f"{task.name} requires missing node {node.name}." + if IS_FILE_SYSTEM_CASE_SENSITIVE: + msg += ( + "\n\n(Hint: Your file-system is case-sensitive. Check the paths' " + "capitalization carefully.)" + ) + raise NodeNotFoundError(msg) # Create directory for product if it does not exist. Maybe this should be a `setup` # method for the node classes. From 48ba5c607e20aa38baea927934af37617f7a5bc4 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 09:46:32 +0100 Subject: [PATCH 8/9] Fix. --- src/_pytask/execute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/_pytask/execute.py b/src/_pytask/execute.py index 6da40172..c6641710 100644 --- a/src/_pytask/execute.py +++ b/src/_pytask/execute.py @@ -127,7 +127,7 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None: for dependency in session.dag.predecessors(task.signature): node = session.dag.nodes[dependency]["node"] if not node.state(): - msg = f"{task.name} requires missing node {node.name}." + msg = f"{task.name!r} requires missing node {node.name!r}." if IS_FILE_SYSTEM_CASE_SENSITIVE: msg += ( "\n\n(Hint: Your file-system is case-sensitive. Check the paths' " From a16cd465783a0a3673d5077758de5d9cb10eb336 Mon Sep 17 00:00:00 2001 From: Tobias Raabe Date: Thu, 9 Nov 2023 09:52:30 +0100 Subject: [PATCH 9/9] fix. --- tests/test_collect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_collect.py b/tests/test_collect.py index 314d2faa..c10b493d 100644 --- a/tests/test_collect.py +++ b/tests/test_collect.py @@ -592,7 +592,7 @@ def task_example(path = {node}): ... tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.COLLECTION_FAILED - assert "only files are allowed" in result.output + assert all(i in result.output for i in ("only", "files", "are", "allowed")) @pytest.mark.parametrize( @@ -615,4 +615,4 @@ def task_example(path: Annotated[Any, Product] = {node}): ... tmp_path.joinpath("task_example.py").write_text(textwrap.dedent(source)) result = runner.invoke(cli, [tmp_path.as_posix()]) assert result.exit_code == ExitCode.COLLECTION_FAILED - assert "only files are allowed" in result.output + assert all(i in result.output for i in ("only", "files", "are", "allowed"))