From 3950e307bc6692f882e348492d1d12c1dc4d769e Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 9 Jun 2024 09:26:20 +0200
Subject: [PATCH 1/9] Redesign the scaling tasks guide.

---
 docs/source/how_to_guides/bp_scaling_tasks.md | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_scaling_tasks.md
index fa7cb5e9..0c13bf75 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_scaling_tasks.md
@@ -1,14 +1,27 @@
 # Scaling tasks
 
-In any bigger project you quickly come to the point where you stack multiple repetitions
-of tasks on top of each other.
+In many projects, tasks are repeated across multiple dimensions that are stacked on top
+of each other.
 
-For example, you have one dataset, four different ways to prepare it, and three
-statistical models to analyze the data. The cartesian product of all steps combined
-comprises twelve differently fitted models.
+For example, take a project that there are four ways to simulate data and there are
+three different models that should be fitted on each dataset.
 
-Here you find some tips on how to set up your tasks such that you can easily modify the
-cartesian product of steps.
+Assuming there is a high-level interface to simulate data, we can loop over the task for
+simulating data four times with different arguments.
+
+Assuming there is a high-level interface to fit models to data,
+
+Assuming that you can easily switch the model the model fitting can be done in a taskThe
+cartesian product of all steps combined comprises twelve differently fitted models.
+
+This guide shows an approach to organizing your tasks that can be best described as
+flattening the loops.
+
+## The data catalog
+
+First of all, we need to create a data catalog in a `config.py` in your project.
+
+The data catalog plays a key role in managing lots of repetitions of tasks because it
 
 ## Scalability
 
@@ -18,7 +31,7 @@ different models to each specification.
 
 This is the structure of the project.
 
-```
+```text
 my_project
 ├───pyproject.toml
 │

From e143a4d03dc218c1ecf81b63c79f8d2d45f9a8bf Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 9 Jun 2024 09:28:16 +0200
Subject: [PATCH 2/9] Fix.

---
 docs/source/changes.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/changes.md b/docs/source/changes.md
index 866452ea..2a521c5a 100644
--- a/docs/source/changes.md
+++ b/docs/source/changes.md
@@ -5,6 +5,10 @@ chronological order. Releases follow [semantic versioning](https://semver.org/)
 releases are available on [PyPI](https://pypi.org/project/pytask) and
 [Anaconda.org](https://anaconda.org/conda-forge/pytask).
 
+## 0.5.1 - 2024-xx-xx
+
+- {pull}`616` redesigns the guide on "Scaling Tasks".
+
 ## 0.5.0 - 2024-05-26
 
 - {pull}`548` fixes the type hints for {meth}`~pytask.Task.execute` and

From d34550f89b9b51e549af5ccd8dd7919010ec89b5 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Mon, 24 Jun 2024 15:27:57 +0200
Subject: [PATCH 3/9] Fix.

---
 docs/source/how_to_guides/bp_scaling_tasks.md | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_scaling_tasks.md
index 0c13bf75..e1a3d277 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_scaling_tasks.md
@@ -1,5 +1,98 @@
 # Scaling tasks
 
+- \[ \] Write about adding another dimension.
+- \[ \] Write about adding another level.
+- \[ \] Write about executing subsets of tasks.
+- \[ \] Write about grouping by one dimension´or aggregating.
+
+In projects where task inputs and outputs are sufficiently standardized, it is possible
+to make extensive use of task repetition.
+
+A common pattern is to write multiple loops around a task function where each loop
+stands for a different dimension. A dimension, for example, represents different
+datasets or model specifications to analyze the datasets.
+
+There is nothing wrong with using nested loops for simpler projects that are clearly
+defined in scope. But, often they are just the start of looking at a problem from
+different angles and soon you want to add more dimensions.
+
+Adding another loop in a lot of places in your project is cumbersome and the increased
+indentation is visually displeasing.
+
+It is not the most serious problem, though. More importantly, it becomes cumbersome to
+reference dependencies of products and to set unique identifiers for tasks. The latter
+is important to execute only subsets of the project.
+
+How do we solve these problems? Here is a brief explanation of the solution.
+
+1. Create objects to define every dimension in the project. A dimension can be
+   characterized by a single value like a {class}`~pathlib.Path`, an
+   {class}`~enum.Enum`, or a {class}`~typing.NamedTuple` or
+   {func}`~dataclasses.dataclass` if more fields are needed.
+
+1. Create an object like a {class}`~typing.NamedTuple` or a
+   {func}`~dataclasses.dataclass` that has one attribute for each dimension. For lack of
+   a better name, we will call this unit an experiment.
+
+   The experiment combines the information provided by each dimension to create a unique
+   identifier for each experiment and the names or paths of dependencies and products
+   for each task.
+
+To make the idea more tangible, let us focus on an example.
+
+## Example
+
+Let us assume we have a project with multiple datasets and model specifications that
+should be fitted to the data.
+
+The datasets are created by the task from the
+{doc}`tutorials <../tutorials/defining_dependencies_products>` parametrized with
+different coefficients.
+
+Below that is the task that fits different models to the datasets using a double loop.
+
+```python
+from pathlib import Path
+from pytask import task, Product
+
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+
+
+
+for dat
+
+
+
+for data_name in ("a", "b", "c"):
+    for model_name in ("ols", "logit", "linear_prob"):
+
+        @task
+        def task_fit_model(path_to_data: Path = SRC / f"{data_name}.pkl")
+
+```
+
+1. The level of indentation is not visually pleasing and does not allow us to
+   sufficiently use every line in the file.
+
+1. Whenever we add another dimension to our problem, we need to extend every occurrence
+   of the nested loops.
+
+But, these problems are more annoying than truly
+
+The first and most important problem is that
+
+The first problem is t
+
+There are couple of problems that arise in these projects.
+
+The main problem is that with
+
+In projects where task inputs and outputs can be standardized and general interface
+
 In many projects, tasks are repeated across multiple dimensions that are stacked on top
 of each other.
 

From 7175e83e9192bd172907c2c5eca425eda46fdecf Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 12 Jul 2024 16:08:10 +0200
Subject: [PATCH 4/9] Add progress.

---
 ...asks.md => bp_complex_task_repetitions.md} | 28 +++++++++++--------
 .../bp_structure_of_task_files.md             |  2 +-
 docs/source/how_to_guides/index.md            |  2 +-
 .../repeating_tasks_with_different_inputs.md  |  3 +-
 .../bp_complex_task_repetitions/example.py    | 19 +++++++++++++
 5 files changed, 40 insertions(+), 14 deletions(-)
 rename docs/source/how_to_guides/{bp_scaling_tasks.md => bp_complex_task_repetitions.md} (88%)
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/example.py

diff --git a/docs/source/how_to_guides/bp_scaling_tasks.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md
similarity index 88%
rename from docs/source/how_to_guides/bp_scaling_tasks.md
rename to docs/source/how_to_guides/bp_complex_task_repetitions.md
index e1a3d277..6e7e16ff 100644
--- a/docs/source/how_to_guides/bp_scaling_tasks.md
+++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md
@@ -1,29 +1,35 @@
-# Scaling tasks
+# Complex task repetitions
 
 - \[ \] Write about adding another dimension.
 - \[ \] Write about adding another level.
 - \[ \] Write about executing subsets of tasks.
-- \[ \] Write about grouping by one dimension´or aggregating.
+- \[ \] Write about grouping by one dimension or aggregating.
 
 In projects where task inputs and outputs are sufficiently standardized, it is possible
 to make extensive use of task repetition.
 
 A common pattern is to write multiple loops around a task function where each loop
-stands for a different dimension. A dimension, for example, represents different
-datasets or model specifications to analyze the datasets.
+stands for a different dimension. A dimension might represent different datasets or
+model specifications to analyze the datasets like in the following example.
+
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+```
 
 There is nothing wrong with using nested loops for simpler projects that are clearly
 defined in scope. But, often they are just the start of looking at a problem from
-different angles and soon you want to add more dimensions.
+different angles.
+
+For more complex projects, you are quickly running into a couple of problems.
 
-Adding another loop in a lot of places in your project is cumbersome and the increased
-indentation is visually displeasing.
+- You need to add the nested loops in a lot of places.
+- Every dimension adds another level of indentation which is not aesthetically pleasing.
+- Adding another dimension leads to a lot of changes in many places.
+- It becomes cumbersome to manage the unique ids of the repeated tasks.
 
-It is not the most serious problem, though. More importantly, it becomes cumbersome to
-reference dependencies of products and to set unique identifiers for tasks. The latter
-is important to execute only subsets of the project.
+The rest of the guide lays out a pattern that
 
-How do we solve these problems? Here is a brief explanation of the solution.
+To solve these problems, the pattern laid out in the rest of the article proved to be
+helpful.
 
 1. Create objects to define every dimension in the project. A dimension can be
    characterized by a single value like a {class}`~pathlib.Path`, an
diff --git a/docs/source/how_to_guides/bp_structure_of_task_files.md b/docs/source/how_to_guides/bp_structure_of_task_files.md
index 857f6479..84e16789 100644
--- a/docs/source/how_to_guides/bp_structure_of_task_files.md
+++ b/docs/source/how_to_guides/bp_structure_of_task_files.md
@@ -14,7 +14,7 @@ are looking for orientation or inspiration, here are some tips.
   module is for.
 
   ```{seealso}
-  The only exception might be for {doc}`repetitions <bp_scaling_tasks>`.
+  The only exception might be for {doc}`repetitions <bp_complex_task_repetitions>`.
   ```
 
 - The purpose of the task function is to handle IO operations like loading and saving
diff --git a/docs/source/how_to_guides/index.md b/docs/source/how_to_guides/index.md
index 8f0e9f47..53068ee0 100644
--- a/docs/source/how_to_guides/index.md
+++ b/docs/source/how_to_guides/index.md
@@ -42,5 +42,5 @@ maxdepth: 1
 bp_structure_of_a_research_project
 bp_structure_of_task_files
 bp_templates_and_projects
-bp_scaling_tasks
+bp_complex_task_repetitions
 ```
diff --git a/docs/source/tutorials/repeating_tasks_with_different_inputs.md b/docs/source/tutorials/repeating_tasks_with_different_inputs.md
index 750435d6..136152ed 100644
--- a/docs/source/tutorials/repeating_tasks_with_different_inputs.md
+++ b/docs/source/tutorials/repeating_tasks_with_different_inputs.md
@@ -291,7 +291,8 @@ for id_, kwargs in ID_TO_KWARGS.items():
     def task_create_random_data(i, produces): ...
 ```
 
-The {doc}`best-practices guide on parametrizations <../how_to_guides/bp_scaling_tasks>`
+The
+{doc}`best-practices guide on parametrizations <../how_to_guides/bp_complex_task_repetitions>`
 goes into even more detail on how to scale parametrizations.
 
 ## A warning on globals
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
new file mode 100644
index 00000000..d0893d7a
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+from typing import Annotated
+
+from pytask import Product
+from pytask import task
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+for data_name in ("a", "b", "c"):
+    for model_name in ("ols", "logit", "linear_prob"):
+
+        @task
+        def task_fit_model(
+            path_to_data: Path = SRC / f"{data_name}.pkl",
+            path_to_model: Annotated[Path, Product] = BLD
+            / f"{data_name}-{model_name}.pkl",
+        ) -> None: ...

From a828c217c5c50fe2ac2e959f5e386896a5ea4ce0 Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Sun, 14 Jul 2024 16:35:22 +0200
Subject: [PATCH 5/9] FIx.

---
 .../bp_complex_task_repetitions.md            | 226 ++++--------------
 .../bp_complex_task_repetitions/example.py    |   2 +-
 .../example_improved.py                       |  14 ++
 .../bp_complex_task_repetitions/experiment.py |  37 +++
 docs_src/how_to_guides/bp_scaling_tasks_1.py  |  20 --
 docs_src/how_to_guides/bp_scaling_tasks_2.py  |  39 ---
 docs_src/how_to_guides/bp_scaling_tasks_3.py  |  18 --
 docs_src/how_to_guides/bp_scaling_tasks_4.py  |  36 ---
 8 files changed, 100 insertions(+), 292 deletions(-)
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
 create mode 100644 docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_1.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_2.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_3.py
 delete mode 100644 docs_src/how_to_guides/bp_scaling_tasks_4.py

diff --git a/docs/source/how_to_guides/bp_complex_task_repetitions.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md
index 6e7e16ff..68e44569 100644
--- a/docs/source/how_to_guides/bp_complex_task_repetitions.md
+++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md
@@ -1,209 +1,79 @@
 # Complex task repetitions
 
-- \[ \] Write about adding another dimension.
-- \[ \] Write about adding another level.
-- \[ \] Write about executing subsets of tasks.
-- \[ \] Write about grouping by one dimension or aggregating.
+{doc}`Task repetitions <../tutorials/repeating_tasks_with_different_inputs>` are amazing
+if you want to execute lots of tasks while not repeating yourself in code.
 
-In projects where task inputs and outputs are sufficiently standardized, it is possible
-to make extensive use of task repetition.
+But, in any bigger project, repetitions can become hard to maintain because there are
+multiple layers or dimensions of repetition.
 
-A common pattern is to write multiple loops around a task function where each loop
-stands for a different dimension. A dimension might represent different datasets or
-model specifications to analyze the datasets like in the following example.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
-```
-
-There is nothing wrong with using nested loops for simpler projects that are clearly
-defined in scope. But, often they are just the start of looking at a problem from
-different angles.
-
-For more complex projects, you are quickly running into a couple of problems.
-
-- You need to add the nested loops in a lot of places.
-- Every dimension adds another level of indentation which is not aesthetically pleasing.
-- Adding another dimension leads to a lot of changes in many places.
-- It becomes cumbersome to manage the unique ids of the repeated tasks.
-
-The rest of the guide lays out a pattern that
-
-To solve these problems, the pattern laid out in the rest of the article proved to be
-helpful.
-
-1. Create objects to define every dimension in the project. A dimension can be
-   characterized by a single value like a {class}`~pathlib.Path`, an
-   {class}`~enum.Enum`, or a {class}`~typing.NamedTuple` or
-   {func}`~dataclasses.dataclass` if more fields are needed.
-
-1. Create an object like a {class}`~typing.NamedTuple` or a
-   {func}`~dataclasses.dataclass` that has one attribute for each dimension. For lack of
-   a better name, we will call this unit an experiment.
-
-   The experiment combines the information provided by each dimension to create a unique
-   identifier for each experiment and the names or paths of dependencies and products
-   for each task.
-
-To make the idea more tangible, let us focus on an example.
+Here you find some tips on how to set up your project such that adding dimensions and
+increasing dimensions becomes much easier.
 
 ## Example
 
-Let us assume we have a project with multiple datasets and model specifications that
-should be fitted to the data.
-
-The datasets are created by the task from the
-{doc}`tutorials <../tutorials/defining_dependencies_products>` parametrized with
-different coefficients.
-
-Below that is the task that fits different models to the datasets using a double loop.
-
-```python
-from pathlib import Path
-from pytask import task, Product
-
-
-SRC = Path(__file__).parent
-BLD = SRC / "bld"
-
-
-
-
-
-for dat
-
-
-
-for data_name in ("a", "b", "c"):
-    for model_name in ("ols", "logit", "linear_prob"):
-
-        @task
-        def task_fit_model(path_to_data: Path = SRC / f"{data_name}.pkl")
+You can write multiple loops around a task function where each loop stands for a
+different dimension. A dimension might represent different datasets or model
+specifications to analyze the datasets like in the following example. The task arguments
+are derived from the dimensions.
 
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+---
+caption: task_example.py
+---
 ```
 
-1. The level of indentation is not visually pleasing and does not allow us to
-   sufficiently use every line in the file.
-
-1. Whenever we add another dimension to our problem, we need to extend every occurrence
-   of the nested loops.
-
-But, these problems are more annoying than truly
-
-The first and most important problem is that
-
-The first problem is t
-
-There are couple of problems that arise in these projects.
-
-The main problem is that with
-
-In projects where task inputs and outputs can be standardized and general interface
-
-In many projects, tasks are repeated across multiple dimensions that are stacked on top
-of each other.
-
-For example, take a project that there are four ways to simulate data and there are
-three different models that should be fitted on each dataset.
-
-Assuming there is a high-level interface to simulate data, we can loop over the task for
-simulating data four times with different arguments.
-
-Assuming there is a high-level interface to fit models to data,
-
-Assuming that you can easily switch the model the model fitting can be done in a taskThe
-cartesian product of all steps combined comprises twelve differently fitted models.
+There is nothing wrong with using nested loops for simpler projects. But, often projects
+are growing over time and you run into these problems.
 
-This guide shows an approach to organizing your tasks that can be best described as
-flattening the loops.
+- When you add a new task, you need to duplicate the nested loops in another module.
+- When you add a dimension, you need to touch multiple files in your project and add
+  another loop and level of indentation.
 
-## The data catalog
+## Solution
 
-First of all, we need to create a data catalog in a `config.py` in your project.
+The main idea for the solution is quickly explained. We will, first, formalize
+dimensions into objects and, secondly, combine them in one object such that we only have
+to iterate over instances of this object in a single loop.
 
-The data catalog plays a key role in managing lots of repetitions of tasks because it
+We will start by defining the dimensions using {class}`~typing.NamedTuple` or
+{func}`~dataclasses.dataclass`.
 
-## Scalability
+Then, we will define the object that holds both pieces of information together and for
+the lack of a better name, we will call it an experiment.
 
-Let us dive right into the aforementioned example. We start with one dataset `data.csv`.
-Then, we will create four different specifications of the data and, finally, fit three
-different models to each specification.
-
-This is the structure of the project.
-
-```text
-my_project
-├───pyproject.toml
-│
-├───src
-│   └───my_project
-│       ├────config.py
-│       │
-│       ├───data
-│       │   └────data.csv
-│       │
-│       ├───data_preparation
-│       │   ├────__init__.py
-│       │   ├────config.py
-│       │   └────task_prepare_data.py
-│       │
-│       └───estimation
-│           ├────__init__.py
-│           ├────config.py
-│           └────task_estimate_models.py
-│
-├───.pytask
-│   └────...
-│
-└───bld
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
+---
+caption: config.py
+---
 ```
 
-The folder structure, the main `config.py` which holds `SRC` and `BLD`, and the tasks
-follow the same structure advocated throughout the tutorials.
+There are some things to be said.
 
-New are the local configuration files in each subfolder of `my_project`, which contain
-objects shared across tasks. For example, `config.py` holds the paths to the processed
-data and the names of the data sets.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_1.py
-```
+- The names on each dimension need to be unique and ensure that by combining them for
+  the name of the experiment, we get a unique and descriptive id.
+- Dimensions might need more attributes than just a name, like paths, or other arguments
+  for the task. Add them.
 
-The task file `task_prepare_data.py` uses these objects to build the repetitions.
+Next, we will use these newly defined data structures and see how our tasks change when
+we use them.
 
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_2.py
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
+---
+caption: task_example.py
+---
 ```
 
-All arguments for the loop and the {func}`@task <pytask.task>` decorator are built
-within a function to keep the logic in one place and the module's namespace clean.
+As you see, we replaced
 
-Ids are used to make the task {ref}`ids <ids>` more descriptive and to simplify their
-selection with {ref}`expressions <expressions>`. Here is an example of the task ids with
-an explicit id.
+## Using the `DataCatalog`
 
-```
-# With id
-.../my_project/data_preparation/task_prepare_data.py::task_prepare_data[data_0]
-```
+## Adding another dimension
 
-Next, we move to the estimation to see how we can build another repetition on top.
+## Adding another level
 
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_3.py
-```
-
-In the local configuration, we define `ESTIMATIONS` which combines the information on
-data and model. The dictionary's key can be used as a task id whenever the estimation is
-involved. It allows triggering all tasks related to one estimation - estimation,
-figures, tables - with one command.
-
-```console
-pytask -k linear_probability_data_0
-```
-
-And here is the task file.
-
-```{literalinclude} ../../../docs_src/how_to_guides/bp_scaling_tasks_4.py
-```
+## Executing a subset
 
-Replicating this pattern across a project allows a clean way to define repetitions.
+## Grouping and aggregating
 
 ## Extending repetitions
 
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
index d0893d7a..3e3bf14e 100644
--- a/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example.py
@@ -11,7 +11,7 @@
 for data_name in ("a", "b", "c"):
     for model_name in ("ols", "logit", "linear_prob"):
 
-        @task
+        @task(id=f"{model_name}-{data_name}")
         def task_fit_model(
             path_to_data: Path = SRC / f"{data_name}.pkl",
             path_to_model: Annotated[Path, Product] = BLD
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
new file mode 100644
index 00000000..741d2c19
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
@@ -0,0 +1,14 @@
+from pathlib import Path
+from typing import Annotated
+
+from myproject.config import EXPERIMENTS
+from pytask import Product
+from pytask import task
+
+for experiment in EXPERIMENTS:
+
+    @task(id=experiment.name)
+    def task_fit_model(
+        path_to_data: experiment.dataset.path,
+        path_to_model: Annotated[Path, Product] = experiment.path,
+    ) -> None: ...
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py b/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
new file mode 100644
index 00000000..002c669e
--- /dev/null
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
@@ -0,0 +1,37 @@
+from pathlib import Path
+from typing import NamedTuple
+
+SRC = Path(__file__).parent
+BLD = SRC / "bld"
+
+
+class Dataset(NamedTuple):
+    name: str
+
+    @property
+    def path(self) -> Path:
+        return SRC / f"{self.name}.pkl"
+
+
+class Model(NamedTuple):
+    name: str
+
+
+DATASETS = [Dataset("a"), Dataset("b"), Dataset("c")]
+MODELS = [Model("ols"), Model("logit"), Model("linear_prob")]
+
+
+class Experiment(NamedTuple):
+    dataset: Dataset
+    model: Model
+
+    @property
+    def name(self) -> str:
+        return f"{self.model.name}-{self.dataset.name}"
+
+    @property
+    def path(self) -> Path:
+        return BLD / f"{self.name}.pkl"
+
+
+EXPERIMENTS = [Experiment(dataset, model) for dataset in DATASETS for model in MODELS]
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_1.py b/docs_src/how_to_guides/bp_scaling_tasks_1.py
deleted file mode 100644
index 52d6ea61..00000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_1.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Content of config.py
-from pathlib import Path
-
-from my_project.config import BLD
-from my_project.config import SRC
-
-DATA = {
-    "data_0": {"subset": "subset_1"},
-    "data_1": {"subset": "subset_2"},
-    "data_2": {"subset": "subset_3"},
-    "data_3": {"subset": "subset_4"},
-}
-
-
-def path_to_input_data(name: str) -> Path:
-    return SRC / "data" / "data.csv"
-
-
-def path_to_processed_data(name: str) -> Path:
-    return BLD / "data" / f"processed_{name}.pkl"
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_2.py b/docs_src/how_to_guides/bp_scaling_tasks_2.py
deleted file mode 100644
index f31cfc64..00000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_2.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Content of task_prepare_data.py
-from pathlib import Path
-
-from my_project.data_preparation.config import DATA
-from my_project.data_preparation.config import path_to_input_data
-from my_project.data_preparation.config import path_to_processed_data
-from pandas import pd
-from pytask import Product
-from pytask import task
-from typing_extensions import Annotated
-
-
-def _create_parametrization(data: list[str]) -> dict[str, Path]:
-    id_to_kwargs = {}
-    for data_name, kwargs in data.items():
-        id_to_kwargs[data_name] = {
-            "path_to_input_data": path_to_input_data(data_name),
-            "path_to_processed_data": path_to_processed_data(data_name),
-            **kwargs,
-        }
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(DATA)
-
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwargs)
-    def task_prepare_data(
-        path_to_input_data: Path,
-        subset: str,
-        path_to_processed_data: Annotated[Path, Product],
-    ) -> None:
-        df = pd.read_csv(path_to_input_data)
-        # ... transform the data.
-        subset = df.loc[df["subset"].eq(subset)]
-        subset.to_pickle(path_to_processed_data)
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_3.py b/docs_src/how_to_guides/bp_scaling_tasks_3.py
deleted file mode 100644
index 1e2103d4..00000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_3.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Content of config.py
-from pathlib import Path
-
-from my_project.config import BLD
-from my_project.data_preparation.config import DATA
-
-_MODELS = ["linear_probability", "logistic_model", "decision_tree"]
-
-
-ESTIMATIONS = {
-    f"{data_name}_{model_name}": {"model": model_name, "data": data_name}
-    for model_name in _MODELS
-    for data_name in DATA
-}
-
-
-def path_to_estimation_result(name: str) -> Path:
-    return BLD / "estimation" / f"estimation_{name}.pkl"
diff --git a/docs_src/how_to_guides/bp_scaling_tasks_4.py b/docs_src/how_to_guides/bp_scaling_tasks_4.py
deleted file mode 100644
index a6c66539..00000000
--- a/docs_src/how_to_guides/bp_scaling_tasks_4.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Content of task_estimate_models.py
-from pathlib import Path
-
-from my_project.data_preparation.config import path_to_processed_data
-from my_project.estimations.config import ESTIMATIONS
-from my_project.estimations.config import path_to_estimation_result
-from pytask import Product
-from pytask import task
-from typing_extensions import Annotated
-
-
-def _create_parametrization(
-    estimations: dict[str, dict[str, str]],
-) -> dict[str, str | Path]:
-    id_to_kwargs = {}
-    for name, config in estimations.items():
-        id_to_kwargs[name] = {
-            "path_to_data": path_to_processed_data(config["data"]),
-            "model": config["model"],
-            "path_to_estimation": path_to_estimation_result(name),
-        }
-
-    return id_to_kwargs
-
-
-_ID_TO_KWARGS = _create_parametrization(ESTIMATIONS)
-
-
-for id_, kwargs in _ID_TO_KWARGS.items():
-
-    @task(id=id_, kwargs=kwargs)
-    def task_estmate_models(
-        path_to_data: Path, model: str, path_to_estimation: Annotated[Path, Product]
-    ) -> None:
-        if model == "linear_probability":
-            ...

From 8bf6b11ab4e4507c66eb56483b87c6eecfdea31f Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 19 Jul 2024 22:28:32 +0200
Subject: [PATCH 6/9] Finish guide.

---
 .../bp_complex_task_repetitions.md            | 82 ++++++++++++++-----
 .../{experiment.py => config.py}              |  8 +-
 .../example_improved.py                       |  7 +-
 3 files changed, 71 insertions(+), 26 deletions(-)
 rename docs_src/how_to_guides/bp_complex_task_repetitions/{experiment.py => config.py} (81%)

diff --git a/docs/source/how_to_guides/bp_complex_task_repetitions.md b/docs/source/how_to_guides/bp_complex_task_repetitions.md
index 68e44569..46c0ff3b 100644
--- a/docs/source/how_to_guides/bp_complex_task_repetitions.md
+++ b/docs/source/how_to_guides/bp_complex_task_repetitions.md
@@ -32,16 +32,23 @@ are growing over time and you run into these problems.
 ## Solution
 
 The main idea for the solution is quickly explained. We will, first, formalize
-dimensions into objects and, secondly, combine them in one object such that we only have
-to iterate over instances of this object in a single loop.
-
-We will start by defining the dimensions using {class}`~typing.NamedTuple` or
+dimensions into objects using {class}`~typing.NamedTuple` or
 {func}`~dataclasses.dataclass`.
 
-Then, we will define the object that holds both pieces of information together and for
-the lack of a better name, we will call it an experiment.
+Secondly, we will combine dimensions in multi-dimensional objects such that we only have
+to iterate over instances of this object in a single loop. Here and for the lack of a
+better name, we will call the object an experiment.
+
+Lastly, we will also use the {class}`~pytask.DataCatalog` to not be bothered with
+defining paths.
 
-```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
+```{seealso}
+If you have not learned about the {class}`~pytask.DataCatalog` yet, start with the
+{doc}`tutorial <../tutorials/using_a_data_catalog>` and continue with the
+{doc}`how-to guide <the_data_catalog>`.
+```
+
+```{literalinclude} ../../../docs_src/how_to_guides/bp_complex_task_repetitions/config.py
 ---
 caption: config.py
 ---
@@ -49,10 +56,11 @@ caption: config.py
 
 There are some things to be said.
 
-- The names on each dimension need to be unique and ensure that by combining them for
-  the name of the experiment, we get a unique and descriptive id.
-- Dimensions might need more attributes than just a name, like paths, or other arguments
-  for the task. Add them.
+- The `.name` attributes on each dimension need to return unique names and to ensure
+  that by combining them for the name of the experiment, we get a unique and descriptive
+  id.
+- Dimensions might need more attributes than just a name, like paths, keys for the data
+  catalog, or other arguments for the task.
 
 Next, we will use these newly defined data structures and see how our tasks change when
 we use them.
@@ -63,21 +71,55 @@ caption: task_example.py
 ---
 ```
 
-As you see, we replaced
+As you see, we lost a level of indentation and we moved all the generations of names and
+paths to the dimensions and multi-dimensional objects.
 
-## Using the `DataCatalog`
+## Adding another level
 
-## Adding another dimension
+Extending a dimension by another level is usually quickly done. For example, if we have
+another model that we want to fit to the data, we extend `MODELS` which will
+automatically lead to all downstream tasks being created.
 
-## Adding another level
+```{code-block} python
+---
+caption: config.py
+---
+...
+MODELS = [Model("ols"), Model("logit"), Model("linear_prob"), Model("new_model")]
+...
+```
+
+Of course, you might need to alter `task_fit_model` because the task needs to handle the
+new model as well as the others. Here is where it pays off if you are using high-level
+interfaces in your code that handle all of the models with a simple
+`fitted_model = fit_model(data=data, model_name=model_name)` call and also return fitted
+models that are similar objects.
 
 ## Executing a subset
 
-## Grouping and aggregating
+What if you want to execute a subset of tasks, for example, all tasks related to a model
+or a dataset?
+
+When you are using the `.name` attributes of the dimensions and multi-dimensional
+objects like in the example above, you ensure that the names of dimensions are included
+in all downstream tasks.
+
+Thus, you can simply call pytask with the following expression to execute all tasks
+related to the logit model.
+
+```console
+pytask -k logit
+```
+
+```{seealso}
+Expressions and markers for selecting tasks are explained in
+{doc}`../tutorials/selecting_tasks`.
+```
 
 ## Extending repetitions
 
-Some parametrized tasks are costly to run - costly in terms of computing power, memory,
-or time. Users often extend repetitions triggering all repetitions to be rerun. Thus,
-use the {func}`@pytask.mark.persist <pytask.mark.persist>` decorator, which is explained
-in more detail in this {doc}`tutorial <../tutorials/making_tasks_persist>`.
+Some repeated tasks are costly to run - costly in terms of computing power, memory, or
+runtime. If you change a task module, you might accidentally trigger all other tasks in
+the module to be rerun. Use the {func}`@pytask.mark.persist <pytask.mark.persist>`
+decorator, which is explained in more detail in this
+{doc}`tutorial <../tutorials/making_tasks_persist>`.
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py b/docs_src/how_to_guides/bp_complex_task_repetitions/config.py
similarity index 81%
rename from docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
rename to docs_src/how_to_guides/bp_complex_task_repetitions/config.py
index 002c669e..f22041ff 100644
--- a/docs_src/how_to_guides/bp_complex_task_repetitions/experiment.py
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/config.py
@@ -1,9 +1,13 @@
 from pathlib import Path
 from typing import NamedTuple
 
+from pytask import DataCatalog
+
 SRC = Path(__file__).parent
 BLD = SRC / "bld"
 
+data_catalog = DataCatalog()
+
 
 class Dataset(NamedTuple):
     name: str
@@ -30,8 +34,8 @@ def name(self) -> str:
         return f"{self.model.name}-{self.dataset.name}"
 
     @property
-    def path(self) -> Path:
-        return BLD / f"{self.name}.pkl"
+    def fitted_model_name(self) -> str:
+        return f"{self.name}-fitted-model"
 
 
 EXPERIMENTS = [Experiment(dataset, model) for dataset in DATASETS for model in MODELS]
diff --git a/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
index 741d2c19..930b9658 100644
--- a/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
+++ b/docs_src/how_to_guides/bp_complex_task_repetitions/example_improved.py
@@ -1,8 +1,8 @@
-from pathlib import Path
 from typing import Annotated
+from typing import Any
 
 from myproject.config import EXPERIMENTS
-from pytask import Product
+from myproject.config import data_catalog
 from pytask import task
 
 for experiment in EXPERIMENTS:
@@ -10,5 +10,4 @@
     @task(id=experiment.name)
     def task_fit_model(
         path_to_data: experiment.dataset.path,
-        path_to_model: Annotated[Path, Product] = experiment.path,
-    ) -> None: ...
+    ) -> Annotated[Any, data_catalog[experiment.fitted_model_name]]: ...

From eaf819b5c92e9a90012367f432f7c0b821adf0df Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 19 Jul 2024 22:35:17 +0200
Subject: [PATCH 7/9] to changes.

---
 docs/source/changes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/changes.md b/docs/source/changes.md
index 6b13c8ec..cb79be8f 100644
--- a/docs/source/changes.md
+++ b/docs/source/changes.md
@@ -5,7 +5,7 @@ chronological order. Releases follow [semantic versioning](https://semver.org/)
 releases are available on [PyPI](https://pypi.org/project/pytask) and
 [Anaconda.org](https://anaconda.org/conda-forge/pytask).
 
-## 0.5.1 - 2024-xx-xx
+## 0.5.1 - 2024-07-19
 
 - {pull}`616` redesigns the guide on "Scaling Tasks".
 - {pull}`617` fixes an interaction with provisional nodes and `@mark.persist`.

From 6b7e0f313c494792f063aff1c9392c1856daa5ec Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 19 Jul 2024 22:35:58 +0200
Subject: [PATCH 8/9] Fix.

---
 docs/source/changes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/changes.md b/docs/source/changes.md
index cb79be8f..375d7a6e 100644
--- a/docs/source/changes.md
+++ b/docs/source/changes.md
@@ -7,7 +7,7 @@ releases are available on [PyPI](https://pypi.org/project/pytask) and
 
 ## 0.5.1 - 2024-07-19
 
-- {pull}`616` redesigns the guide on "Scaling Tasks".
+- {pull}`616` and {pull}`632` redesign the guide on "Scaling Tasks".
 - {pull}`617` fixes an interaction with provisional nodes and `@mark.persist`.
 - {pull}`618` ensures that `root_dir` of `DirectoryNode` is created before the task is
   executed.

From 68c62531124ee70c2d0be6416cf066df892004ee Mon Sep 17 00:00:00 2001
From: Tobias Raabe <raabe@posteo.de>
Date: Fri, 19 Jul 2024 22:59:03 +0200
Subject: [PATCH 9/9] Fix.

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3104f2a7..025f574c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -72,7 +72,7 @@ test = [
     "aiohttp",      # For HTTPPath tests.
     "coiled",
 ]
-typing = ["mypy>=1.9.0", "nbqa[mypy]>=1.8.5"]
+typing = ["mypy>=1.9.0,<1.11", "nbqa[mypy]>=1.8.5"]
 
 [project.urls]
 Changelog = "https://pytask-dev.readthedocs.io/en/stable/changes.html"
@@ -186,6 +186,7 @@ disallow_untyped_defs = true
 no_implicit_optional = true
 warn_redundant_casts = true
 warn_unused_ignores = true
+disable_error_code = ["import-untyped"]
 
 [[tool.mypy.overrides]]
 module = "tests.*"