Skip to content

Take a common ancestor if closest not available to shorten paths. #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ all releases are available on `PyPI <https://pypi.org/project/pytask>`_ and
`Anaconda.org <https://anaconda.org/conda-forge/pytask>`_.


0.0.14 - 2021-xx-xx
-------------------

- :gh:`76` fixes :gh:`75` which reports a bug when a closest ancestor cannot be found to
shorten node names in the CLI output. Instead a common ancestor is used.


0.0.13 - 2021-03-09
-------------------

Expand Down
77 changes: 12 additions & 65 deletions src/_pytask/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from _pytask.exceptions import NodeNotCollectedError
from _pytask.exceptions import NodeNotFoundError
from _pytask.mark import get_marks_from_obj
from _pytask.path import find_closest_ancestor
from _pytask.path import find_common_ancestor
from _pytask.path import relative_to
from _pytask.shared import find_duplicates


Expand Down Expand Up @@ -337,66 +340,6 @@ def _create_task_name(path: Path, base_name: str):
return path.as_posix() + "::" + base_name


def _relative_to(path: Path, source: Path, include_source: bool = True):
"""Make a path relative to another path.

In contrast to :meth:`pathlib.Path.relative_to`, this function allows to keep the
name of the source path.

Examples
--------
>>> from pathlib import Path
>>> _relative_to(Path("folder", "file.py"), Path("folder")).as_posix()
'folder/file.py'
>>> _relative_to(Path("folder", "file.py"), Path("folder"), False).as_posix()
'file.py'

"""
return Path(source.name if include_source else "", path.relative_to(source))


def _find_closest_ancestor(path: Path, potential_ancestors: List[Path]):
"""Find the closest ancestor of a path.

In case a path is the path to the task file itself, we return the path.

.. note::

The check :meth:`pathlib.Path.is_file` only succeeds when the file exists. This
must be true as otherwise an error is raised by :obj:`click` while using the
cli.

Examples
--------
>>> from pathlib import Path
>>> _find_closest_ancestor(Path("folder", "file.py"), [Path("folder")]).as_posix()
'folder'

>>> paths = [Path("folder"), Path("folder", "subfolder")]
>>> _find_closest_ancestor(Path("folder", "subfolder", "file.py"), paths).as_posix()
'folder/subfolder'

"""
closest_ancestor = None
for ancestor in potential_ancestors:
if ancestor == path:
closest_ancestor = path
break

# Paths can also point to files in which case we want to take the parent folder.
if ancestor.is_file():
ancestor = ancestor.parent

if ancestor in path.parents:
if closest_ancestor is None or (
len(path.relative_to(ancestor).parts)
< len(path.relative_to(closest_ancestor).parts)
):
closest_ancestor = ancestor

return closest_ancestor


def reduce_node_name(node, paths: List[Path]):
"""Reduce the node name.

Expand All @@ -407,17 +350,21 @@ def reduce_node_name(node, paths: List[Path]):
path from one path in ``session.config["paths"]`` to the node.

"""
ancestor = _find_closest_ancestor(node.path, paths)
ancestor = find_closest_ancestor(node.path, paths)
if ancestor is None:
raise ValueError("A node must be defined in a child of 'paths'.")
elif isinstance(node, MetaTask):
try:
ancestor = find_common_ancestor(node.path, *paths)
except ValueError:
ancestor = node.path.parents[-1]

if isinstance(node, MetaTask):
if ancestor == node.path:
name = _create_task_name(Path(node.path.name), node.base_name)
else:
shortened_path = _relative_to(node.path, ancestor)
shortened_path = relative_to(node.path, ancestor)
name = _create_task_name(shortened_path, node.base_name)
elif isinstance(node, MetaNode):
name = _relative_to(node.path, ancestor).as_posix()
name = relative_to(node.path, ancestor).as_posix()
else:
raise ValueError(f"Unknown node {node} with type '{type(node)}'.")

Expand Down
96 changes: 96 additions & 0 deletions src/_pytask/path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""This module contains code to handle paths."""
from pathlib import Path
from pathlib import PurePath
from typing import List
from typing import Union


def relative_to(
path: Union[str, Path], source: Union[str, Path], include_source: bool = True
) -> Union[str, Path]:
"""Make a path relative to another path.

In contrast to :meth:`pathlib.Path.relative_to`, this function allows to keep the
name of the source path.

Examples
--------
The default behavior of :mod:`pathlib` is to exclude the source path from the
relative path.

>>> relative_to("folder/file.py", "folder", False).as_posix()
'file.py'

To provide relative locations to users, it is sometimes more helpful to provide the
source as an orientation.

>>> relative_to("folder/file.py", "folder").as_posix()
'folder/file.py'

"""
source_name = Path(source).name if include_source else ""
return Path(source_name, Path(path).relative_to(source))


def find_closest_ancestor(
path: Union[str, Path], potential_ancestors: List[Union[str, Path]]
) -> Path:
"""Find the closest ancestor of a path.

In case a path is the path to the task file itself, we return the path.

.. note::

The check :meth:`pathlib.Path.is_file` only succeeds when the file exists. This
must be true as otherwise an error is raised by :obj:`click` while using the
cli.

Examples
--------
>>> from pathlib import Path
>>> find_closest_ancestor(Path("folder", "file.py"), [Path("folder")]).as_posix()
'folder'

>>> paths = [Path("folder"), Path("folder", "subfolder")]
>>> find_closest_ancestor(Path("folder", "subfolder", "file.py"), paths).as_posix()
'folder/subfolder'

"""
closest_ancestor = None
for ancestor in potential_ancestors:
if ancestor == path:
closest_ancestor = path
break

# Paths can also point to files in which case we want to take the parent folder.
if ancestor.is_file():
ancestor = ancestor.parent

if ancestor in path.parents:
if closest_ancestor is None or (
len(path.relative_to(ancestor).parts)
< len(path.relative_to(closest_ancestor).parts)
):
closest_ancestor = ancestor

return closest_ancestor


def find_common_ancestor(*paths: Union[str, Path]) -> Path:
"""Find a common ancestor of many paths."""
paths = [path if isinstance(path, PurePath) else Path(path) for path in paths]

for path in paths:
if not path.is_absolute():
raise ValueError(
f"Cannot find common ancestor for relative paths. {path} is relative."
)

common_parents = set.intersection(*[set(path.parents) for path in paths])

if len(common_parents) == 0:
raise ValueError("Paths have no common ancestor.")
else:
longest_parent = sorted(common_parents, key=lambda x: len(x.parts))[-1]

return longest_parent
2 changes: 1 addition & 1 deletion tests/test_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def task_unicode():


@pytest.mark.end_to_end
@pytest.mark.xfail(reason="pytask cannot capture during collection.")
@pytest.mark.xfail(strict=True, reason="pytask cannot capture during collection.")
def test_collect_capturing(tmp_path, runner):
source = """
import sys
Expand Down
52 changes: 12 additions & 40 deletions tests/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from _pytask.nodes import _convert_objects_to_node_dictionary
from _pytask.nodes import _create_task_name
from _pytask.nodes import _extract_nodes_from_function_markers
from _pytask.nodes import _find_closest_ancestor
from _pytask.nodes import _relative_to
from _pytask.nodes import depends_on
from _pytask.nodes import FilePathNode
from _pytask.nodes import MetaNode
Expand Down Expand Up @@ -203,37 +201,6 @@ def test_create_task_name(path, name, expected):
assert result == expected


@pytest.mark.unit
@pytest.mark.parametrize(
"path, source, include_source, expected",
[
(Path("src/hello.py"), Path("src"), True, Path("src/hello.py")),
(Path("src/hello.py"), Path("src"), False, Path("hello.py")),
],
)
def test_relative_to(path, source, include_source, expected):
result = _relative_to(path, source, include_source)
assert result == expected


@pytest.mark.unit
@pytest.mark.parametrize(
"path, potential_ancestors, expected",
[
(Path("src/task.py"), [Path("src"), Path("bld")], Path("src")),
(Path("tasks/task.py"), [Path("src"), Path("bld")], None),
(Path("src/ts/task.py"), [Path("src"), Path("src/ts")], Path("src/ts")),
(Path("src/in.txt"), [Path("src/task_d.py")], Path("src")),
(Path("src/task.py"), [Path("src/task.py")], Path("src/task.py")),
],
)
def test_find_closest_ancestor(monkeypatch, path, potential_ancestors, expected):
# Ensures that files are detected by an existing suffix not if they also exist.
monkeypatch.setattr("_pytask.nodes.pathlib.Path.is_file", lambda x: bool(x.suffix))
result = _find_closest_ancestor(path, potential_ancestors)
assert result == expected


@attr.s
class DummyTask(MetaTask):
path = attr.ib()
Expand All @@ -259,19 +226,21 @@ class FalseNode:
@pytest.mark.parametrize(
"node, paths, expectation, expected",
[
(
pytest.param(
FilePathNode.from_path(_ROOT.joinpath("src/module.py")),
[_ROOT.joinpath("alternative_src")],
pytest.raises(ValueError, match="A node must be"),
None,
does_not_raise(),
"pytask/src/module.py",
id="Common path found for FilePathNode not in 'paths' and 'paths'",
),
(
pytest.param(
FalseNode(_ROOT.joinpath("src/module.py")),
[_ROOT.joinpath("src")],
pytest.raises(ValueError, match="Unknown node"),
None,
id="throw error on unknown node type.",
),
(
pytest.param(
DummyTask(
_ROOT.joinpath("top/src/module.py"),
_ROOT.joinpath("top/src/module.py").as_posix() + "::task_func",
Expand All @@ -280,14 +249,16 @@ class FalseNode:
[_ROOT.joinpath("top/src")],
does_not_raise(),
"src/module.py::task_func",
id="make task name relative to 'paths'.",
),
(
pytest.param(
FilePathNode.from_path(_ROOT.joinpath("top/src/module.py")),
[_ROOT.joinpath("top/src")],
does_not_raise(),
"src/module.py",
id="make filepathnode relative to 'paths'.",
),
(
pytest.param(
PythonFunctionTask(
"task_d",
_ROOT.joinpath("top/src/module.py").as_posix() + "::task_d",
Expand All @@ -297,6 +268,7 @@ class FalseNode:
[_ROOT.joinpath("top/src/module.py")],
does_not_raise(),
"module.py::task_d",
id="shorten task name when task module is passed to 'paths'.",
),
],
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parametrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def func(i):


@pytest.mark.integration
@pytest.mark.xfail(reason="Cartesian task product is disabled.")
@pytest.mark.xfail(strict=True, reason="Cartesian task product is disabled.")
def test_pytask_generate_tasks_1(session):
@pytask.mark.parametrize("j", range(2))
@pytask.mark.parametrize("i", range(2))
Expand All @@ -62,7 +62,7 @@ def func(i, j):


@pytest.mark.integration
@pytest.mark.xfail(reason="Cartesian task product is disabled.")
@pytest.mark.xfail(strict=True, reason="Cartesian task product is disabled.")
def test_pytask_generate_tasks_2(session):
@pytask.mark.parametrize("j, k", itertools.product(range(2), range(2)))
@pytask.mark.parametrize("i", range(2))
Expand Down Expand Up @@ -264,7 +264,7 @@ def task_func(i):


@pytest.mark.end_to_end
@pytest.mark.xfail(reason="Cartesian task product is disabled.")
@pytest.mark.xfail(strict=True, reason="Cartesian task product is disabled.")
def test_two_parametrize_w_ids(tmp_path):
tmp_path.joinpath("task_dummy.py").write_text(
textwrap.dedent(
Expand Down
Loading