From 1d966b40dca71bf1e97212e241769e80df487687 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Wed, 24 Apr 2024 23:19:18 +0200
Subject: [PATCH 1/9] Improve documentation on data catalogs.

---
 .gitignore                                    |  1 +
 docs/source/conf.py                           |  3 +-
 docs/source/how_to_guides/bp_scaling_tasks.md |  3 -
 docs/source/how_to_guides/the_data_catalog.md | 17 ++--
 docs/source/tutorials/using_a_data_catalog.md | 99 +++++++++++++------
 docs_src/tutorials/using_a_data_catalog_4.py  |  1 -
 pyproject.toml                                |  6 +-
 src/_pytask/data_catalog.py                   |  6 +-
 8 files changed, 88 insertions(+), 48 deletions(-)

diff --git a/.gitignore b/.gitignore
index 01b8ee7e..d23644a4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@ tests/test_jupyter/*.txt
 .pytest_cache
 .ruff_cache
 .venv
+docs/jupyter_execute
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 84b0d6be..22848175 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -51,8 +51,7 @@
     "sphinx_copybutton",
     "sphinx_click",
     "sphinx_toolbox.more_autodoc.autoprotocol",
-    "nbsphinx",
-    "myst_parser",
+    "myst_nb",
     "sphinx_design",
 ]
 
diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_scaling_tasks.md
index 4342d6ef..fa7cb5e9 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_scaling_tasks.md
@@ -39,9 +39,6 @@ my_project
 │           ├────config.py
 │           └────task_estimate_models.py
 │
-│
-├───setup.py
-│
 ├───.pytask
 │   └────...
 │
diff --git a/docs/source/how_to_guides/the_data_catalog.md b/docs/source/how_to_guides/the_data_catalog.md
index 6d8b825b..f7f592ca 100644
--- a/docs/source/how_to_guides/the_data_catalog.md
+++ b/docs/source/how_to_guides/the_data_catalog.md
@@ -1,9 +1,8 @@
 # The `DataCatalog` - Revisited
 
-An introduction to the data catalog can be found in the
-[tutorial](../tutorials/using_a_data_catalog.md).
-
-This guide explains some details that were left out of the tutorial.
+This guide explains more details about the {class}`~pytask.DataCatalog` that were left
+out of the [tutorial](../tutorials/using_a_data_catalog.md). Please, read the tutorial
+for a basic understanding.
 
 ## Changing the default node
 
@@ -20,18 +19,18 @@ from pytask import PythonNode
 data_catalog = DataCatalog(default_node=PythonNode)
 ```
 
-Or, learn to write your own node by reading {doc}`writing_custom_nodes`.
+Or, learn to write your node by reading {doc}`writing_custom_nodes`.
 
-Here, is an example for a `PickleNode` that uses cloudpickle instead of the normal
-`pickle` module.
+Here, is an example for a {class}`~pytask.PickleNode` that uses cloudpickle instead of
+the normal {mod}`pickle` module.
 
 ```{literalinclude} ../../../docs_src/how_to_guides/the_data_catalog.py
 ```
 
 ## Changing the name and the default path
 
-By default, the data catalogs store their data in a directory `.pytask/data_catalogs`.
-If you use a `pyproject.toml` with a `[tool.pytask.ini_options]` section, then the
+By default, data catalogs store their data in a directory `.pytask/data_catalogs`. If
+you use a `pyproject.toml` with a `[tool.pytask.ini_options]` section, then the
 `.pytask` folder is in the same folder as the configuration file.
 
 The default name for a catalog is `"default"` and so you will find its data in
diff --git a/docs/source/tutorials/using_a_data_catalog.md b/docs/source/tutorials/using_a_data_catalog.md
index d704421d..d24d64d0 100644
--- a/docs/source/tutorials/using_a_data_catalog.md
+++ b/docs/source/tutorials/using_a_data_catalog.md
@@ -10,14 +10,14 @@ Two things will quickly become a nuisance in bigger projects.
    they are just intermediate representations.
 
 As a solution, pytask offers a {class}`~pytask.DataCatalog` which is a purely optional
-feature. The tutorial focuses on the main features. To learn about all features, read
-the [how-to guide](../how_to_guides/the_data_catalog.md).
+feature. The tutorial focuses on the main features. To learn about all the features,
+read the [how-to guide](../how_to_guides/the_data_catalog.md).
 
 Let us focus on the previous example and see how the {class}`~pytask.DataCatalog` helps
 us.
 
-The project structure is the same as in the previous example with the exception of the
-`.pytask` folder and the missing `data.pkl` in `bld`.
+The project structure is the same as in the previous example except the `.pytask` folder
+and the missing `data.pkl` in `bld`.
 
 ```text
 my_project
@@ -44,15 +44,51 @@ At first, we define the data catalog in `config.py`.
 ```{literalinclude} ../../../docs_src/tutorials/using_a_data_catalog_1.py
 ```
 
-## `task_data_preparation`
+## `task_create_random_data`
 
-Next, we will use the data catalog to save the product of the task in
-`task_data_preparation.py`.
+Next, we look at the module `task_data_preparation.py` and its task
+`task_create_random_data`. The task creates a dataframe with simulated data that should
+be stored on the disk.
 
-Instead of using a path, we set the location of the product in the data catalog with
-`data_catalog["data"]`. If the key does not exist, the data catalog will automatically
-create a {class}`~pytask.PickleNode` that allows you to save any Python object to a
-`pickle` file. The `pickle` file is stored within the `.pytask` folder.
+In the previous tutorial, we learned to use {class}`~pathlib.Path`s to define products
+of our tasks. Here we see again the signature of the task function.
+
+`````{tab-set}
+
+````{tab-item} Python 3.10+
+:sync: python310plus
+
+```{literalinclude} ../../../docs_src/tutorials/defining_dependencies_products_products_py310.py
+:lines: 10-12
+```
+````
+
+````{tab-item} Python 3.8+
+:sync: python38plus
+
+```{literalinclude} ../../../docs_src/tutorials/defining_dependencies_products_products_py38.py
+:lines: 10-12
+```
+````
+
+````{tab-item} produces
+:sync: produces
+
+```{literalinclude} ../../../docs_src/tutorials/defining_dependencies_products_products_produces.py
+:lines: 8
+```
+````
+`````
+
+When we want to use the data catalog, we replace `BLD / "data.pkl"` with an entry of the
+data catalog like `data_catalog["data"]`. If there is yet no entry with the name
+`"data"`, the data catalog will automatically create a {class}`~pytask.PickleNode`. The
+node allows you to save any Python object to a `pickle` file.
+
+You probably noticed that we did not need to define a path. That is because the data
+catalog takes care of that and stores the `pickle` file in the `.pytask` folder.
+
+Using `data_catalog["data"]` is thus equivalent to using `PickleNode(path=Path(...))`.
 
 The following tabs show you how to use the data catalog given the interface you prefer.
 
@@ -125,10 +161,6 @@ Following one of the interfaces gives you immediate access to the
 ````{tab-item} Python 3.10+
 :sync: python310plus
 
-Use `data_catalog["data"]` as an default argument to access the
-{class}`~pytask.PickleNode` within the task. When you are done transforming your
-{class}`~pandas.DataFrame`, save it with {meth}`~pytask.PickleNode.save`.
-
 ```{literalinclude} ../../../docs_src/tutorials/using_a_data_catalog_3_py310.py
 :emphasize-lines: 12
 ```
@@ -138,10 +170,6 @@ Use `data_catalog["data"]` as an default argument to access the
 ````{tab-item} Python 3.8+
 :sync: python38plus
 
-Use `data_catalog["data"]` as an default argument to access the
-{class}`~pytask.PickleNode` within the task. When you are done transforming your
-{class}`~pandas.DataFrame`, save it with {meth}`~pytask.PickleNode.save`.
-
 ```{literalinclude} ../../../docs_src/tutorials/using_a_data_catalog_3_py38.py
 :emphasize-lines: 12
 ```
@@ -160,7 +188,8 @@ In most projects, you have other data sets that you would like to access via the
 catalog. To add them, call the {meth}`~pytask.DataCatalog.add` method and supply a name
 and a path.
 
-Let's add `file.csv` to the data catalog.
+Let's add `file.csv` with the name `"csv"` to the data catalog and use it to create
+`data["transformed_csv"]`.
 
 ```text
 my_project
@@ -174,8 +203,6 @@ my_project
 │       ├────task_data_preparation.py
 │       └────task_plot_data.py
 │
-├───setup.py
-│
 ├───.pytask
 │   └────...
 │
@@ -184,13 +211,24 @@ my_project
     └────plot.png
 ```
 
-The path can be absolute or relative to the module of the data catalog.
+We can use a relative or an absolute path to define the location of the file. A relative
+path means the location is relative to the module of the data catalog.
 
 ```{literalinclude} ../../../docs_src/tutorials/using_a_data_catalog_4.py
 ```
 
-You can now use the data catalog as in previous example and use the
-{class}`~~pathlib.Path` in the task.
+You can now use the data catalog as in the previous example and use the
+{class}`~pathlib.Path` in the task.
+
+```{note}
+Note that the value of `data_catalog["csv"]` inside the task becomes a
+{class}`~pathlib.Path`. It is because a {class}`~pathlib.Path` in
+{meth}`~pytask.DataCatalog.add` is not parsed to a {class}`~pytask.PickleNode` but a
+{class}`~pytask.PathNode`.
+
+Read {doc}`../how_to_guides/writing_custom_nodes` for more information about
+different node types which is not relevant now.
+```
 
 `````{tab-set}
 
@@ -224,9 +262,14 @@ You can now use the data catalog as in previous example and use the
 
 ## Developing with the `DataCatalog`
 
-You can also use the data catalog in a Jupyter notebook or in the terminal in the Python
-interpreter. Simply import the data catalog, select a node and call the
-{meth}`~pytask.PNode.load` method of a node to access its value.
+You can also use the data catalog in a Jupyter Notebook or the terminal in the Python
+interpreter. This can be super helpful when you develop tasks interactively in a Jupyter
+Notebook.
+
+Simply import the data catalog, select a node and call the {meth}`~pytask.PNode.load`
+method of a node to access its value.
+
+Here is an example with a terminal.
 
 ```pycon
 >>> from myproject.config import data_catalog
diff --git a/docs_src/tutorials/using_a_data_catalog_4.py b/docs_src/tutorials/using_a_data_catalog_4.py
index 125bbdf5..fd49e88d 100644
--- a/docs_src/tutorials/using_a_data_catalog_4.py
+++ b/docs_src/tutorials/using_a_data_catalog_4.py
@@ -10,4 +10,3 @@
 
 # Use either a relative or a absolute path.
 data_catalog.add("csv", Path("file.csv"))
-data_catalog.add("transformed_csv", BLD / "file.pkl")
diff --git a/pyproject.toml b/pyproject.toml
index f864f715..8a11a25c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,7 +52,7 @@ docs = [
     "ipython",
     "matplotlib",
     "myst-parser",
-    "nbsphinx",
+    "myst-nb",
     "sphinx",
     "sphinx-click",
     "sphinx-copybutton",
@@ -92,6 +92,10 @@ build-backend = "hatchling.build"
 managed = true
 dev-dependencies = ["tox-uv>=1.7.0"]
 
+[tool.rye.scripts]
+clean-docs = { cmd = "rm -rf docs/build" }
+build-docs = { cmd = "sphinx-build -b html docs/source docs/build" }
+
 [tool.hatch.build.hooks.vcs]
 version-file = "src/_pytask/_version.py"
 
diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py
index 615fc021..e97f0e55 100644
--- a/src/_pytask/data_catalog.py
+++ b/src/_pytask/data_catalog.py
@@ -91,10 +91,8 @@ def __getitem__(self, name: str) -> PNode | PProvisionalNode:
             self.add(name)
         return self.entries[name]
 
-    def add(self, name: str, node: PNode | PProvisionalNode | None = None) -> None:
+    def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
         """Add an entry to the data catalog."""
-        assert isinstance(self.path, Path)
-
         if not isinstance(name, str):
             msg = "The name of a catalog entry must be a string."
             raise TypeError(msg)
@@ -107,7 +105,7 @@ def add(self, name: str, node: PNode | PProvisionalNode | None = None) -> None:
                 )
             else:
                 self.entries[name] = self.default_node(name=name)  # type: ignore[call-arg]
-            self.path.joinpath(f"{filename}-node.pkl").write_bytes(
+            self.path.joinpath(f"{filename}-node.pkl").write_bytes(  # type: ignore[union-attr]
                 pickle.dumps(self.entries[name])
             )
         elif isinstance(node, (PNode, PProvisionalNode)):

From 883a6e8da4bbf72da6db4ff0639fff91a9a8371f Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Wed, 24 Apr 2024 23:20:02 +0200
Subject: [PATCH 2/9] fix.

---
 docs/source/changes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/changes.md b/docs/source/changes.md
index 216901a7..c2a255fa 100644
--- a/docs/source/changes.md
+++ b/docs/source/changes.md
@@ -47,6 +47,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
 - {pull}`603` fixes an example in the documentation about capturing warnings.
 - {pull}`604` fixes some examples with `PythonNode`s in the documentation.
 - {pull}`605` improves checks and CI.
+- {pull}`606` improves the documentation for data catalogs.
 
 ## 0.4.7 - 2024-03-19
 

From 7f7dd064dc1b13aa64628fef55987cd9c1e0407a Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sat, 4 May 2024 00:33:56 +0200
Subject: [PATCH 3/9] Fix.

---
 docs/source/reference_guides/api.md | 2 ++
 tox.ini                             | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/source/reference_guides/api.md b/docs/source/reference_guides/api.md
index 4ec74a8e..d06d90b5 100644
--- a/docs/source/reference_guides/api.md
+++ b/docs/source/reference_guides/api.md
@@ -228,7 +228,9 @@ Task are currently represented by the following classes:
 
 ```{eval-rst}
 .. autoclass:: pytask.Task
+   :members:
 .. autoclass:: pytask.TaskWithoutPath
+   :members:
 ```
 
 Currently, there are no different types of tasks since changing the `.function`
diff --git a/tox.ini b/tox.ini
index 4e82bf7f..f6f1319f 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,7 +9,7 @@ package = editable
 [testenv:docs]
 extras = docs, test
 commands =
-    - sphinx-build -n -T -b html -d {envtmpdir}/doctrees docs/source docs/build/html
+    sphinx-build -n -T -b html -d {envtmpdir}/doctrees docs/source docs/build/html
     - sphinx-build -n -T -b doctest -d {envtmpdir}/doctrees docs/source docs/build/html
 
 [testenv:typing]

From 2177d0ea626575eec30cba44c10fb1ea38dd958a Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sat, 4 May 2024 00:39:12 +0200
Subject: [PATCH 4/9] Make entries private.

---
 src/_pytask/data_catalog.py | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py
index e97f0e55..c06913c9 100644
--- a/src/_pytask/data_catalog.py
+++ b/src/_pytask/data_catalog.py
@@ -47,9 +47,6 @@ class DataCatalog:
         A default node for loading and saving values. By default,
         :class:`~pytask.PickleNode` is used to serialize any Python object with the
         :mod:`pickle` module.
-    entries
-        A collection of entries in the catalog. Entries can be :class:`~pytask.PNode` or
-        a :class:`DataCatalog` itself for nesting catalogs.
     name
         The name of the data catalog. Use it when you are working with multiple data
         catalogs that store data under the same keys.
@@ -60,13 +57,13 @@ class DataCatalog:
     """
 
     default_node: type[PNode] = PickleNode
-    entries: dict[str, PNode | PProvisionalNode] = field(factory=dict)
     name: str = "default"
     path: Path | None = None
+    _entries: dict[str, PNode | PProvisionalNode] = field(factory=dict)
+    _instance_path: Path = field(factory=_get_parent_path_of_data_catalog_module)
     _session_config: dict[str, Any] = field(
         factory=lambda *x: {"check_casing_of_paths": True}  # noqa: ARG005
     )
-    _instance_path: Path = field(factory=_get_parent_path_of_data_catalog_module)
 
     def __attrs_post_init__(self) -> None:
         root_path, _ = find_project_root_and_config((self._instance_path,))
@@ -77,19 +74,16 @@ def __attrs_post_init__(self) -> None:
 
         self.path.mkdir(parents=True, exist_ok=True)
 
-        self._initialize()
-
-    def _initialize(self) -> None:
-        """Initialize the data catalog with persisted nodes from previous runs."""
+        # Initialize the data catalog with persisted nodes from previous runs.
         for path in self.path.glob("*-node.pkl"):  # type: ignore[union-attr]
             node = pickle.loads(path.read_bytes())  # noqa: S301
-            self.entries[node.name] = node
+            self._entries[node.name] = node
 
     def __getitem__(self, name: str) -> PNode | PProvisionalNode:
         """Allow to access entries with the squared brackets syntax."""
-        if name not in self.entries:
+        if name not in self._entries:
             self.add(name)
-        return self.entries[name]
+        return self._entries[name]
 
     def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
         """Add an entry to the data catalog."""
@@ -100,16 +94,16 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
         if node is None:
             filename = hashlib.sha256(name.encode()).hexdigest()
             if isinstance(self.default_node, PPathNode):
-                self.entries[name] = self.default_node(
+                self._entries[name] = self.default_node(
                     name=name, path=self.path / f"{filename}.pkl"
                 )
             else:
-                self.entries[name] = self.default_node(name=name)  # type: ignore[call-arg]
+                self._entries[name] = self.default_node(name=name)  # type: ignore[call-arg]
             self.path.joinpath(f"{filename}-node.pkl").write_bytes(  # type: ignore[union-attr]
-                pickle.dumps(self.entries[name])
+                pickle.dumps(self._entries[name])
             )
         elif isinstance(node, (PNode, PProvisionalNode)):
-            self.entries[name] = node
+            self._entries[name] = node
         else:
             # Acquire the latest pluginmanager.
             session = Session(config=self._session_config, hook=storage.get().hook)
@@ -123,4 +117,4 @@ def add(self, name: str, node: PNode | PProvisionalNode | Any = None) -> None:
             if collected_node is None:  # pragma: no cover
                 msg = f"{node!r} cannot be parsed."
                 raise NodeNotCollectedError(msg)
-            self.entries[name] = collected_node
+            self._entries[name] = collected_node

From 5004c35718081f5151651007f447e05a469116e4 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sat, 4 May 2024 01:11:11 +0200
Subject: [PATCH 5/9] Fix errors in doctest.

---
 docs/source/reference_guides/api.md | 3 +++
 src/_pytask/nodes.py                | 1 +
 src/_pytask/outcomes.py             | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/source/reference_guides/api.md b/docs/source/reference_guides/api.md
index d06d90b5..56204c28 100644
--- a/docs/source/reference_guides/api.md
+++ b/docs/source/reference_guides/api.md
@@ -327,6 +327,9 @@ resolution and execution.
 
     An indicator to mark arguments of tasks as products.
 
+    >>> from pathlib import Path
+    >>> from pytask import Product
+    >>> from typing_extensions import Annotated
     >>> def task_example(path: Annotated[Path, Product]) -> None:
     ...     path.write_text("Hello, World!")
 
diff --git a/src/_pytask/nodes.py b/src/_pytask/nodes.py
index 24b4d207..4c678d9d 100644
--- a/src/_pytask/nodes.py
+++ b/src/_pytask/nodes.py
@@ -226,6 +226,7 @@ class PythonNode(PNode):
     own hashing function. For example, from the :mod:`deepdiff` library.
 
     >>> from deepdiff import DeepHash
+    >>> from pytask import PythonNode
     >>> node = PythonNode(name="node", value={"a": 1}, hash=lambda x: DeepHash(x)[x])
 
     .. warning:: Hashing big objects can require some time.
diff --git a/src/_pytask/outcomes.py b/src/_pytask/outcomes.py
index 524f0975..f39151b6 100644
--- a/src/_pytask/outcomes.py
+++ b/src/_pytask/outcomes.py
@@ -174,7 +174,7 @@ def count_outcomes(
 
     Examples
     --------
-    >>> from _pytask.outcomes import CollectionOutcome, TaskOutcome
+    >>> from _pytask.outcomes import CollectionOutcome, TaskOutcome, count_outcomes
     >>> count_outcomes([], CollectionOutcome)
     {<CollectionOutcome.SUCCESS: 1>: 0, <CollectionOutcome.FAIL: 2>: 0}
 

From d8894b8d64cfdf16361d0399616e1ce72b7649fd Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sat, 4 May 2024 01:21:57 +0200
Subject: [PATCH 6/9] fix.

---
 src/_pytask/data_catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py
index c06913c9..c906d701 100644
--- a/src/_pytask/data_catalog.py
+++ b/src/_pytask/data_catalog.py
@@ -75,7 +75,7 @@ def __attrs_post_init__(self) -> None:
         self.path.mkdir(parents=True, exist_ok=True)
 
         # Initialize the data catalog with persisted nodes from previous runs.
-        for path in self.path.glob("*-node.pkl"):  # type: ignore[union-attr]
+        for path in self.path.glob("*-node.pkl"):
             node = pickle.loads(path.read_bytes())  # noqa: S301
             self._entries[node.name] = node
 

From 4480d781a012d51330490190992b0b35c26cfd7e Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 12 May 2024 15:21:00 +0200
Subject: [PATCH 7/9] Document restriction to data catalog names.

---
 docs/source/how_to_guides/the_data_catalog.md | 4 ++++
 src/_pytask/data_catalog.py                   | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/source/how_to_guides/the_data_catalog.md b/docs/source/how_to_guides/the_data_catalog.md
index f7f592ca..fa93f8e4 100644
--- a/docs/source/how_to_guides/the_data_catalog.md
+++ b/docs/source/how_to_guides/the_data_catalog.md
@@ -41,6 +41,10 @@ The default name for a catalog is `"default"` and so you will find its data in
 data_catalog = DataCatalog(name="data_management")
 ```
 
+```{note}
+The name of a data catalog is restricted to letters, numbers, hyphens and underscores.
+```
+
 You can also change the path where the data catalogs will be stored by changing the
 `path` attribute. Here, we store the data catalog's data next to the module where the
 data catalog is defined in `.data`.
diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py
index c906d701..ebe2f4b7 100644
--- a/src/_pytask/data_catalog.py
+++ b/src/_pytask/data_catalog.py
@@ -48,8 +48,9 @@ class DataCatalog:
         :class:`~pytask.PickleNode` is used to serialize any Python object with the
         :mod:`pickle` module.
     name
-        The name of the data catalog. Use it when you are working with multiple data
-        catalogs that store data under the same keys.
+        The name of the data catalog which can only contain letters, numbers, hyphens
+        and underscores. Use it when you are working with multiple data catalogs to
+        store data in different locations.
     path
         A path where automatically created files are stored. By default, it will be
         ``.pytask/data_catalogs/default``.

From 1b4df364919d28213666adeaddde150b7ea248eb Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 12 May 2024 15:42:57 +0200
Subject: [PATCH 8/9] Add exampe for nested data catalogs.

---
 docs/source/how_to_guides/the_data_catalog.md | 62 ++++++++++++++++++-
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/docs/source/how_to_guides/the_data_catalog.md b/docs/source/how_to_guides/the_data_catalog.md
index fa93f8e4..74e3db7c 100644
--- a/docs/source/how_to_guides/the_data_catalog.md
+++ b/docs/source/how_to_guides/the_data_catalog.md
@@ -14,6 +14,7 @@ For example, use the {class}`~pytask.PythonNode` as the default.
 
 ```python
 from pytask import PythonNode
+from pytask import DataCatalog
 
 
 data_catalog = DataCatalog(default_node=PythonNode)
@@ -38,6 +39,9 @@ The default name for a catalog is `"default"` and so you will find its data in
 `"data_management"`, you will find the data in `.pytask/data_catalogs/data_management`.
 
 ```python
+from pytask import DataCatalog
+
+
 data_catalog = DataCatalog(name="data_management")
 ```
 
@@ -51,6 +55,7 @@ data catalog is defined in `.data`.
 
 ```python
 from pathlib import Path
+from pytask import DataCatalog
 
 
 data_catalog = DataCatalog(path=Path(__file__).parent / ".data")
@@ -58,14 +63,15 @@ data_catalog = DataCatalog(path=Path(__file__).parent / ".data")
 
 ## Multiple data catalogs
 
-You can use multiple data catalogs when you want to separate your datasets across
-multiple catalogs or when you want to use the same names multiple times (although it is
-not recommended!).
+You can use multiple data catalogs when you want to separate your datasets or to avoid
+name collisions of data catalog entries.
 
 Make sure you assign different names to the data catalogs so that their data is stored
 in different directories.
 
 ```python
+from pytask import DataCatalog
+
 # Stored in .pytask/data_catalog/a
 data_catalog_a = DataCatalog(name="a")
 
@@ -74,3 +80,53 @@ data_catalog_b = DataCatalog(name="b")
 ```
 
 Or, use different paths as explained above.
+
+## Nested data catalogs
+
+Name collisions can also occur when you are using multiple levels of repetitions, for
+example, when you are fitting multiple models to multiple data sets.
+
+You can structure your data catalogs like this.
+
+```python
+from pytask import DataCatalog
+
+
+MODEL_NAMES = ("ols", "logistic_regression")
+DATA_NAMES = ("data_1", "data_2")
+
+
+nested_data_catalogs = {
+    model_name: {
+        data_name: DataCatalog(name=f"{model_name}-{data_name}")
+        for data_name in DATA_NAMES
+    }
+    for model_name in MODEL_NAMES
+}
+```
+
+The task could look like this.
+
+```python
+from pathlib import Path
+from pytask import task
+from typing_extensions import Annotated
+
+from my_project.config import DATA_NAMES
+from my_project.config import MODEL_NAMES
+from my_project.config import nested_data_catalogs
+
+
+for model_name in MODEL_NAMES:
+    for data_name in DATA_NAMES:
+
+        @task
+        def fit_model(
+            path: Path = Path("...", data_name)
+        ) -> Annotated[
+            Any, nested_data_catalogs[model_name][data_name]["fitted_model"]
+        ]:
+            data = ...
+            fitted_model = ...
+            return fitted_model
+```

From 13173f49c41e66fd0d75b72b4690f08702a67b4c Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 12 May 2024 15:59:39 +0200
Subject: [PATCH 9/9] Fix.

---
 src/_pytask/data_catalog.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/_pytask/data_catalog.py b/src/_pytask/data_catalog.py
index 730558bb..8a9a08cd 100644
--- a/src/_pytask/data_catalog.py
+++ b/src/_pytask/data_catalog.py
@@ -59,7 +59,6 @@ class DataCatalog:
     """
 
     default_node: type[PNode] = PickleNode
-    entries: dict[str, PNode | PProvisionalNode] = field(factory=dict)
     name: str = field(default="default")
     path: Path | None = None
     _entries: dict[str, PNode | PProvisionalNode] = field(factory=dict)