Skip to content

Commit d266a68

Browse files
authored
Merge 3f6d04d into 580f415
2 parents 580f415 + 3f6d04d commit d266a68

File tree

14 files changed

+135
-173
lines changed

14 files changed

+135
-173
lines changed

docs/source/changes.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
1818
when a product annotation is used with the argument name `produces`. And, allow
1919
`produces` to intake any node.
2020
- {pull}`490` refactors and better tests parsing of dependencies.
21+
- {pull}`496` makes pytask even lazier. Now, when a task produces a node whose hash
22+
remains the same, the consecutive tasks are not executed. It remained from when pytask
23+
relied on timestamps.
2124

22-
## 0.4.2 - 2023-11-8
25+
## 0.4.2 - 2023-11-08
2326

2427
- {pull}`449` simplifies the code building the plugin manager.
2528
- {pull}`451` improves `collect_command.py` and renames `graph.py` to `dag_command.py`.

docs/source/reference_guides/hookspecs.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ your plugin.
7777
```{eval-rst}
7878
.. autofunction:: pytask_dag
7979
.. autofunction:: pytask_dag_create_dag
80-
.. autofunction:: pytask_dag_select_execution_dag
8180
.. autofunction:: pytask_dag_log
8281
8382
```

src/_pytask/collect.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from _pytask.console import is_jupyter
2424
from _pytask.exceptions import CollectionError
2525
from _pytask.mark import MarkGenerator
26+
from _pytask.mark_utils import get_all_marks
2627
from _pytask.mark_utils import has_mark
2728
from _pytask.node_protocols import PNode
2829
from _pytask.node_protocols import PPathNode
@@ -246,6 +247,13 @@ def pytask_collect_task(
246247
247248
"""
248249
if (name.startswith("task_") or has_mark(obj, "task")) and is_task_function(obj):
250+
if has_mark(obj, "try_first") and has_mark(obj, "try_last"):
251+
msg = (
252+
"The task cannot have mixed priorities. Do not apply "
253+
"'@pytask.mark.try_first' and '@pytask.mark.try_last' at the same time."
254+
)
255+
raise ValueError(msg)
256+
249257
path_nodes = Path.cwd() if path is None else path.parent
250258
dependencies = parse_dependencies_from_task_function(
251259
session, path, name, path_nodes, obj
@@ -254,7 +262,7 @@ def pytask_collect_task(
254262
session, path, name, path_nodes, obj
255263
)
256264

257-
markers = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else []
265+
markers = get_all_marks(obj)
258266

259267
# Get the underlying function to avoid having different states of the function,
260268
# e.g. due to pytask_meta, in different layers of the wrapping.

src/_pytask/dag.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,7 @@
1414
from _pytask.console import format_task_name
1515
from _pytask.console import render_to_string
1616
from _pytask.console import TASK_ICON
17-
from _pytask.dag_utils import node_and_neighbors
18-
from _pytask.dag_utils import task_and_descending_tasks
19-
from _pytask.dag_utils import TopologicalSorter
20-
from _pytask.database_utils import DatabaseSession
21-
from _pytask.database_utils import State
2217
from _pytask.exceptions import ResolvingDependenciesError
23-
from _pytask.mark import Mark
2418
from _pytask.node_protocols import PNode
2519
from _pytask.node_protocols import PTask
2620
from _pytask.nodes import PythonNode
@@ -31,7 +25,6 @@
3125
from rich.tree import Tree
3226

3327
if TYPE_CHECKING:
34-
from _pytask.node_protocols import MetaNode
3528
from pathlib import Path
3629
from _pytask.session import Session
3730

@@ -44,7 +37,6 @@ def pytask_dag(session: Session) -> bool | None:
4437
session=session, tasks=session.tasks
4538
)
4639
session.hook.pytask_dag_modify_dag(session=session, dag=session.dag)
47-
session.hook.pytask_dag_select_execution_dag(session=session, dag=session.dag)
4840

4941
except Exception: # noqa: BLE001
5042
report = DagReport.from_exception(sys.exc_info())
@@ -101,59 +93,6 @@ def _add_product(dag: nx.DiGraph, task: PTask, node: PNode) -> None:
10193
return dag
10294

10395

104-
@hookimpl
105-
def pytask_dag_select_execution_dag(session: Session, dag: nx.DiGraph) -> None:
106-
"""Select the tasks which need to be executed."""
107-
scheduler = TopologicalSorter.from_dag(dag)
108-
visited_nodes: set[str] = set()
109-
110-
while scheduler.is_active():
111-
task_signature = scheduler.get_ready()[0]
112-
if task_signature not in visited_nodes:
113-
task = dag.nodes[task_signature]["task"]
114-
have_changed = _have_task_or_neighbors_changed(session, dag, task)
115-
if have_changed:
116-
visited_nodes.update(task_and_descending_tasks(task_signature, dag))
117-
else:
118-
dag.nodes[task_signature]["task"].markers.append(
119-
Mark("skip_unchanged", (), {})
120-
)
121-
scheduler.done(task_signature)
122-
123-
124-
def _have_task_or_neighbors_changed(
125-
session: Session, dag: nx.DiGraph, task: PTask
126-
) -> bool:
127-
"""Indicate whether dependencies or products of a task have changed."""
128-
return any(
129-
session.hook.pytask_dag_has_node_changed(
130-
session=session,
131-
dag=dag,
132-
task=task,
133-
node=dag.nodes[node_name].get("task") or dag.nodes[node_name].get("node"),
134-
)
135-
for node_name in node_and_neighbors(dag, task.signature)
136-
)
137-
138-
139-
@hookimpl(trylast=True)
140-
def pytask_dag_has_node_changed(task: PTask, node: MetaNode) -> bool:
141-
"""Indicate whether a single dependency or product has changed."""
142-
# If node does not exist, we receive None.
143-
node_state = node.state()
144-
if node_state is None:
145-
return True
146-
147-
with DatabaseSession() as session:
148-
db_state = session.get(State, (task.signature, node.signature))
149-
150-
# If the node is not in the database.
151-
if db_state is None:
152-
return True
153-
154-
return node_state != db_state.hash_
155-
156-
15796
def _check_if_dag_has_cycles(dag: nx.DiGraph) -> None:
15897
"""Check if DAG has cycles."""
15998
try:

src/_pytask/dag_utils.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
from typing import TYPE_CHECKING
88

99
import networkx as nx
10-
from _pytask.console import format_strings_as_flat_tree
11-
from _pytask.console import format_task_name
12-
from _pytask.console import TASK_ICON
1310
from _pytask.mark_utils import has_mark
1411
from attrs import define
1512
from attrs import field
@@ -54,8 +51,11 @@ def node_and_neighbors(dag: nx.DiGraph, node: str) -> Iterable[str]:
5451
We cannot use ``dag.neighbors`` as it only considers successors as neighbors in a
5552
DAG.
5653
54+
The task node needs to be yield in the middle so that first predecessors are checked
55+
and then the rest of the nodes.
56+
5757
"""
58-
return itertools.chain([node], dag.predecessors(node), dag.successors(node))
58+
return itertools.chain(dag.predecessors(node), [node], dag.successors(node))
5959

6060

6161
@define
@@ -166,25 +166,6 @@ def _extract_priorities_from_tasks(tasks: list[PTask]) -> dict[str, int]:
166166
}
167167
for task in tasks
168168
}
169-
tasks_w_mixed_priorities = [
170-
name for name, p in priorities.items() if p["try_first"] and p["try_last"]
171-
]
172-
173-
if tasks_w_mixed_priorities:
174-
name_to_task = {task.signature: task for task in tasks}
175-
reduced_names = []
176-
for name in tasks_w_mixed_priorities:
177-
reduced_name = format_task_name(name_to_task[name], "no_link")
178-
reduced_names.append(reduced_name.plain)
179-
180-
text = format_strings_as_flat_tree(
181-
reduced_names, "Tasks with mixed priorities", TASK_ICON
182-
)
183-
msg = (
184-
f"'try_first' and 'try_last' cannot be applied on the same task. See the "
185-
f"following tasks for errors:\n\n{text}"
186-
)
187-
raise ValueError(msg)
188169

189170
# Recode to numeric values for sorting.
190171
numeric_mapping = {(True, False): 1, (False, False): 0, (False, True): -1}

src/_pytask/database_utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from sqlalchemy.orm import sessionmaker
1212

1313
if TYPE_CHECKING:
14+
from _pytask.node_protocols import MetaNode
15+
from _pytask.node_protocols import PTask
1416
from _pytask.session import Session
1517

1618

@@ -62,3 +64,20 @@ def update_states_in_database(session: Session, task_signature: str) -> None:
6264
node = session.dag.nodes[name].get("task") or session.dag.nodes[name]["node"]
6365
hash_ = node.state()
6466
_create_or_update_state(task_signature, node.signature, hash_)
67+
68+
69+
def has_node_changed(task: PTask, node: MetaNode) -> bool:
70+
"""Indicate whether a single dependency or product has changed."""
71+
# If node does not exist, we receive None.
72+
node_state = node.state()
73+
if node_state is None:
74+
return True
75+
76+
with DatabaseSession() as session:
77+
db_state = session.get(State, (task.signature, node.signature))
78+
79+
# If the node is not in the database.
80+
if db_state is None:
81+
return True
82+
83+
return node_state != db_state.hash_

src/_pytask/execute.py

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
from _pytask.console import format_strings_as_flat_tree
1717
from _pytask.console import unify_styles
1818
from _pytask.dag_utils import descending_tasks
19+
from _pytask.dag_utils import node_and_neighbors
1920
from _pytask.dag_utils import TopologicalSorter
21+
from _pytask.database_utils import has_node_changed
2022
from _pytask.database_utils import update_states_in_database
2123
from _pytask.exceptions import ExecutionError
2224
from _pytask.exceptions import NodeLoadError
@@ -28,6 +30,7 @@
2830
from _pytask.node_protocols import PTask
2931
from _pytask.outcomes import count_outcomes
3032
from _pytask.outcomes import Exit
33+
from _pytask.outcomes import SkippedUnchanged
3134
from _pytask.outcomes import TaskOutcome
3235
from _pytask.outcomes import WouldBeExecuted
3336
from _pytask.reports import ExecutionReport
@@ -124,28 +127,42 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
124127
2. Create the directory where the product will be placed.
125128
126129
"""
127-
for dependency in session.dag.predecessors(task.signature):
128-
node = session.dag.nodes[dependency]["node"]
129-
if not node.state():
130-
msg = f"{task.name!r} requires missing node {node.name!r}."
131-
if IS_FILE_SYSTEM_CASE_SENSITIVE:
132-
msg += (
133-
"\n\n(Hint: Your file-system is case-sensitive. Check the paths' "
134-
"capitalization carefully.)"
135-
)
136-
raise NodeNotFoundError(msg)
130+
if has_mark(task, "would_be_executed"):
131+
raise WouldBeExecuted
132+
133+
dag = session.dag
134+
135+
needs_to_be_executed = session.config["force"]
136+
if not needs_to_be_executed:
137+
predecessors = set(dag.predecessors(task.signature)) | {task.signature}
138+
for node_signature in node_and_neighbors(dag, task.signature):
139+
node = dag.nodes[node_signature].get("task") or dag.nodes[
140+
node_signature
141+
].get("node")
142+
if node_signature in predecessors and not node.state():
143+
msg = f"{task.name!r} requires missing node {node.name!r}."
144+
if IS_FILE_SYSTEM_CASE_SENSITIVE:
145+
msg += (
146+
"\n\n(Hint: Your file-system is case-sensitive. Check the "
147+
"paths' capitalization carefully.)"
148+
)
149+
raise NodeNotFoundError(msg)
150+
151+
has_changed = has_node_changed(task=task, node=node)
152+
if has_changed:
153+
needs_to_be_executed = True
154+
break
155+
156+
if not needs_to_be_executed:
157+
raise SkippedUnchanged
137158

138159
# Create directory for product if it does not exist. Maybe this should be a `setup`
139160
# method for the node classes.
140-
for product in session.dag.successors(task.signature):
141-
node = session.dag.nodes[product]["node"]
161+
for product in dag.successors(task.signature):
162+
node = dag.nodes[product]["node"]
142163
if isinstance(node, PPathNode):
143164
node.path.parent.mkdir(parents=True, exist_ok=True)
144165

145-
would_be_executed = has_mark(task, "would_be_executed")
146-
if would_be_executed:
147-
raise WouldBeExecuted
148-
149166

150167
def _safe_load(node: PNode, task: PTask, is_product: bool) -> Any:
151168
try:

src/_pytask/hookspecs.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414

1515
if TYPE_CHECKING:
16-
from _pytask.node_protocols import MetaNode
1716
from _pytask.models import NodeInfo
1817
from _pytask.node_protocols import PNode
1918
import click
@@ -245,28 +244,6 @@ def pytask_dag_modify_dag(session: Session, dag: nx.DiGraph) -> None:
245244
"""
246245

247246

248-
@hookspec
249-
def pytask_dag_select_execution_dag(session: Session, dag: nx.DiGraph) -> None:
250-
"""Select the subgraph which needs to be executed.
251-
252-
This hook determines which of the tasks have to be re-run because something has
253-
changed.
254-
255-
"""
256-
257-
258-
@hookspec(firstresult=True)
259-
def pytask_dag_has_node_changed(
260-
session: Session, dag: nx.DiGraph, task: PTask, node: MetaNode
261-
) -> None:
262-
"""Select the subgraph which needs to be executed.
263-
264-
This hook determines which of the tasks have to be re-run because something has
265-
changed.
266-
267-
"""
268-
269-
270247
@hookspec
271248
def pytask_dag_log(session: Session, report: DagReport) -> None:
272249
"""Log errors during resolving dependencies."""

src/_pytask/mark/structures.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import Iterable
77
from typing import Mapping
88

9+
from _pytask.mark_utils import get_all_marks
910
from _pytask.models import CollectionMetadata
1011
from _pytask.typing import is_task_function
1112
from attrs import define
@@ -122,7 +123,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> MarkDecorator:
122123

123124
def get_unpacked_marks(obj: Callable[..., Any]) -> list[Mark]:
124125
"""Obtain the unpacked marks that are stored on an object."""
125-
mark_list = obj.pytask_meta.markers if hasattr(obj, "pytask_meta") else []
126+
mark_list = get_all_marks(obj)
126127
return normalize_mark_list(mark_list)
127128

128129

src/_pytask/persist.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from _pytask.config import hookimpl
88
from _pytask.dag_utils import node_and_neighbors
9+
from _pytask.database_utils import has_node_changed
910
from _pytask.database_utils import update_states_in_database
1011
from _pytask.mark_utils import has_mark
1112
from _pytask.outcomes import Persisted
@@ -46,7 +47,16 @@ def pytask_execute_task_setup(session: Session, task: PTask) -> None:
4647
)
4748

4849
if all_nodes_exist:
49-
raise Persisted
50+
any_node_changed = any(
51+
has_node_changed(
52+
task=task,
53+
node=session.dag.nodes[name].get("task")
54+
or session.dag.nodes[name]["node"],
55+
)
56+
for name in node_and_neighbors(session.dag, task.signature)
57+
)
58+
if any_node_changed:
59+
raise Persisted
5060

5161

5262
@hookimpl

tests/test_collect.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,3 +661,21 @@ def task_example() -> Annotated[int, 1]: ...
661661
result = runner.invoke(cli, [tmp_path.as_posix()])
662662
assert result.exit_code == ExitCode.COLLECTION_FAILED
663663
assert "The return annotation of the task" in result.output
664+
665+
666+
@pytest.mark.end_to_end()
667+
def test_scheduling_w_mixed_priorities(runner, tmp_path):
668+
source = """
669+
import pytask
670+
671+
@pytask.mark.try_last
672+
@pytask.mark.try_first
673+
def task_mixed(): pass
674+
"""
675+
tmp_path.joinpath("task_module.py").write_text(textwrap.dedent(source))
676+
677+
result = runner.invoke(cli, [tmp_path.as_posix()])
678+
679+
assert result.exit_code == ExitCode.COLLECTION_FAILED
680+
assert "Could not collect" in result.output
681+
assert "The task cannot have" in result.output

0 commit comments

Comments
 (0)