From c4031dd535101ae22058375d34f385a24636acb8 Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 18:32:27 +0000
Subject: [PATCH 1/6] Adds .vscode/ to .gitignore to exclude Visual Studio Code
 workspace settings from version control

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index a5df42b..377ce2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,3 +170,4 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+.vscode/

From 4ae5bfcf846d3d89b57500e8d30f8d632ea088c7 Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 18:32:34 +0000
Subject: [PATCH 2/6] Adds initial implementation of data processing pipeline
 with modules for data input, aggregation, processing, and plotting

---
 code_your_own_pandas_pipeline/aggregations.py | 71 ++++++++++++++++++
 code_your_own_pandas_pipeline/config.py       | 36 ++++++++++
 code_your_own_pandas_pipeline/data_in.py      | 45 ++++++++++++
 code_your_own_pandas_pipeline/pipeline.py     | 40 +++++++++++
 code_your_own_pandas_pipeline/plots.py        | 72 +++++++++++++++++++
 code_your_own_pandas_pipeline/processing.py   | 54 ++++++++++++++
 6 files changed, 318 insertions(+)
 create mode 100644 code_your_own_pandas_pipeline/aggregations.py
 create mode 100644 code_your_own_pandas_pipeline/config.py
 create mode 100644 code_your_own_pandas_pipeline/data_in.py
 create mode 100644 code_your_own_pandas_pipeline/pipeline.py
 create mode 100644 code_your_own_pandas_pipeline/plots.py
 create mode 100644 code_your_own_pandas_pipeline/processing.py

diff --git a/code_your_own_pandas_pipeline/aggregations.py b/code_your_own_pandas_pipeline/aggregations.py
new file mode 100644
index 0000000..6397cc4
--- /dev/null
+++ b/code_your_own_pandas_pipeline/aggregations.py
@@ -0,0 +1,71 @@
+"""
+This modules provides function to pivot and summarize the practice level appointment data.
+"""
+
+import pandas as pd
+from loguru import logger
+
+placeholder_df = pd.DataFrame()
+
+
+def pivot_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Pivot the practice level data.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The pivoted data.
+    """
+    logger.info("Pivoting the practice level data.")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
+
+
+def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Summarize the monthly appointments by GP and Appointment Status.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The summarized data.
+    """
+    logger.info("Summarizing the monthly GP appointments.")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
+
+
+def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Summarize the monthly appointments by Region and Appointment Status.
+
+    Parameters
+    ----------
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The summarized data.
+    """
+    logger.info("Summarizing the monthly region appointments.")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
diff --git a/code_your_own_pandas_pipeline/config.py b/code_your_own_pandas_pipeline/config.py
new file mode 100644
index 0000000..a7a1544
--- /dev/null
+++ b/code_your_own_pandas_pipeline/config.py
@@ -0,0 +1,36 @@
+"""
+Configuration file for the code_your_own_pandas_pipeline package.
+"""
+
+from pathlib import Path
+
+from dotenv import load_dotenv
+from loguru import logger
+
+# Load environment variables from .env file if it exists
+load_dotenv()
+
+# Paths
+PROJ_ROOT = Path(__file__).resolve().parents[1]
+logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
+
+DATA_DIR = (PROJ_ROOT / "data").relative_to(PROJ_ROOT)
+RAW_DATA_DIR = DATA_DIR / "raw"
+INTERIM_DATA_DIR = DATA_DIR / "interim"
+PROCESSED_DATA_DIR = DATA_DIR / "processed"
+EXTERNAL_DATA_DIR = DATA_DIR / "external"
+
+MODELS_DIR = PROJ_ROOT / "models"
+
+REPORTS_DIR = PROJ_ROOT / "reports"
+FIGURES_DIR = REPORTS_DIR / "figures"
+
+# If tqdm is installed, configure loguru with tqdm.write
+# https://github.com/Delgan/loguru/issues/135
+try:
+    from tqdm import tqdm
+
+    logger.remove(0)
+    logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
+except ModuleNotFoundError:
+    pass
diff --git a/code_your_own_pandas_pipeline/data_in.py b/code_your_own_pandas_pipeline/data_in.py
new file mode 100644
index 0000000..e3df794
--- /dev/null
+++ b/code_your_own_pandas_pipeline/data_in.py
@@ -0,0 +1,45 @@
+"""
+This module contains the function to read the mapping and practice crosstab data from the data 
+folder.
+"""
+import pandas as pd
+from loguru import logger
+
+placeholder_df = pd.DataFrame()
+
+
+def read_mapping_data() -> pd.DataFrame:
+    """
+    Read the mapping data from the data folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        The mapping data.
+    """
+    logger.info(f"Reading mapping data from {""}")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
+
+
+def read_practice_crosstab_data() -> pd.DataFrame:
+    """
+    Read the practice crosstab data from the data folder.
+
+    Returns
+    -------
+    pd.DataFrame
+        The practice crosstab data.
+    """
+    logger.info(f"Reading practice crosstab data from {""}")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
+
+
+if __name__ == "__main__":
+    read_mapping_data().head()
+    read_practice_crosstab_data().head()
diff --git a/code_your_own_pandas_pipeline/pipeline.py b/code_your_own_pandas_pipeline/pipeline.py
new file mode 100644
index 0000000..baaf377
--- /dev/null
+++ b/code_your_own_pandas_pipeline/pipeline.py
@@ -0,0 +1,40 @@
+"""
+Main pipeline for the code_your_own_pandas_pipeline package.
+"""
+
+from loguru import logger
+
+from code_your_own_pandas_pipeline import aggregations, config, data_in, plots, processing
+
+
+placeholder_df = pd.DataFrame()
+
+
+def main() -> None:
+    """
+    Main function to run the pipeline.
+
+    Returns
+    -------
+    None
+    """
+    logger.level("START", no=15, color="<green><bold>")
+    logger.log("START", "Starting the GP Appointment Data Pipeline")
+
+    data_in.read_mapping_data()
+    data_in.read_practice_crosstab_data()
+
+    processing.tidy_practice_level_data(placeholder_df)
+    processing.merge_mapping_and_practice_data(placeholder_df, placeholder_df)
+
+    aggregations.pivot_practice_level_data(placeholder_df)
+    aggregations.summarize_monthly_gp_appointments(placeholder_df)
+    aggregations.summarize_monthly_region_appointments(placeholder_df)
+
+    plots.plot_monthly_gp_appointments(placeholder_df, "placeholder_str")
+    plots.plot_monthly_region_appointments(placeholder_df, "placeholder_str")
+
+    logger.success("GP Appointment Data Pipeline Completed")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/code_your_own_pandas_pipeline/plots.py b/code_your_own_pandas_pipeline/plots.py
new file mode 100644
index 0000000..faa77fd
--- /dev/null
+++ b/code_your_own_pandas_pipeline/plots.py
@@ -0,0 +1,72 @@
+"""
+This module provides function for generating and saving plots.
+"""
+
+import pandas as pd
+from loguru import logger
+
+
+def save_plot(plot, output_folder: str, plot_name: str) -> None:
+    """
+    Save the plot to the output folder.
+
+    Parameters
+    ----------
+    plot : matplotlib.pyplot
+        The plot to save.
+    output_folder : str
+        The output folder to save the plot.
+    plot_name : str
+        The plot name.
+
+    Returns
+    -------
+    None
+    """
+    logger.info(f"Saving the plot {plot_name} to {output_folder}.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def plot_monthly_gp_appointments(
+    monthly_gp_appointments: pd.DataFrame, output_folder: str
+) -> None:
+    """
+    Plot the monthly GP appointments.
+
+    Parameters
+    ----------
+    monthly_gp_appointments : pd.DataFrame
+        The monthly GP appointments data.
+    output_folder : str
+        The output folder to save the plots.
+
+    Returns
+    -------
+    None
+    """
+    logger.info("Plotting the monthly GP appointments.")
+
+    logger.warning("This function is not yet implemented.")
+
+
+def plot_monthly_region_appointments(
+    monthly_region_appointments: pd.DataFrame, output_folder: str
+) -> None:
+    """
+    Plot the monthly region appointments.
+
+    Parameters
+    ----------
+    monthly_region_appointments : pd.DataFrame
+        The monthly region appointments data.
+    output_folder : str
+        The output folder to save the plots.
+
+    Returns
+    -------
+    None
+    """
+    logger.info("Plotting the monthly region appointments.")
+
+    logger.warning("This function is not yet implemented.")
\ No newline at end of file
diff --git a/code_your_own_pandas_pipeline/processing.py b/code_your_own_pandas_pipeline/processing.py
new file mode 100644
index 0000000..87b77f0
--- /dev/null
+++ b/code_your_own_pandas_pipeline/processing.py
@@ -0,0 +1,54 @@
+"""
+This module contains the functions to process the mapping and practice crosstab data and merge them.
+"""
+
+import pandas as pd
+from loguru import logger
+
+placeholder_df = pd.DataFrame()
+
+
+def tidy_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
+    """
+    Tidy the practice crosstab data.
+
+    Parameters
+    ----------
+    practice_crosstab : pd.DataFrame
+        The practice crosstab data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The tidy practice crosstab data.
+    """
+    logger.info("Tidying the practice crosstab data.")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df
+
+
+def merge_mapping_and_practice_data(
+    mapping_data: pd.DataFrame, practice_data: pd.DataFrame
+) -> pd.DataFrame:
+    """
+    Merge the mapping and practice data.
+
+    Parameters
+    ----------
+    mapping_data : pd.DataFrame
+        The mapping data.
+    practice_data : pd.DataFrame
+        The practice data.
+
+    Returns
+    -------
+    pd.DataFrame
+        The merged data.
+    """
+    logger.info("Merging the mapping and practice data.")
+
+    logger.warning("This function is not yet implemented.")
+
+    return placeholder_df

From 7b9ce53873d41c0d13b2caf9cd79c82723068128 Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 18:42:13 +0000
Subject: [PATCH 3/6] Renames downloaded data file to _data.zip and removes it
 after extraction to streamline data handling

---
 Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 58ec83b..d743dd7 100644
--- a/Makefile
+++ b/Makefile
@@ -106,8 +106,9 @@ create_tests:
 .PHONY: get_data
 get_data:
 	mkdir -p data/raw
-	wget -O data/raw/Practice_Level_Crosstab_Sep_24.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
-	unzip -o data/raw/Practice_Level_Crosstab_Sep_24.zip -d data/raw
+	wget -O data/raw/_data.zip https://files.digital.nhs.uk/A5/B4AB19/Practice_Level_Crosstab_Sep_24.zip
+	unzip -o data/raw/_data.zip -d data/raw
+	rm data/raw/_data.zip
 
 
 #################################################################################
@@ -152,4 +153,4 @@ endef
 export PRINT_HELP_PYSCRIPT
 
 help:
-	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
+	@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
\ No newline at end of file

From f824aa98f7477c8ac544df0b390dd0c5d5c8bfe3 Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 18:42:23 +0000
Subject: [PATCH 4/6] Removes placeholder return statements from unimplemented
 functions in data processing and aggregation modules

---
 code_your_own_pandas_pipeline/aggregations.py | 6 ------
 code_your_own_pandas_pipeline/data_in.py      | 9 ++-------
 code_your_own_pandas_pipeline/pipeline.py     | 7 ++++---
 code_your_own_pandas_pipeline/plots.py        | 2 +-
 code_your_own_pandas_pipeline/processing.py   | 4 ----
 5 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/code_your_own_pandas_pipeline/aggregations.py b/code_your_own_pandas_pipeline/aggregations.py
index 6397cc4..06d99dc 100644
--- a/code_your_own_pandas_pipeline/aggregations.py
+++ b/code_your_own_pandas_pipeline/aggregations.py
@@ -26,8 +26,6 @@ def pivot_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
 
     logger.warning("This function is not yet implemented.")
 
-    return placeholder_df
-
 
 def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
     """
@@ -47,8 +45,6 @@ def summarize_monthly_gp_appointments(pivot_practice_data: pd.DataFrame) -> pd.D
 
     logger.warning("This function is not yet implemented.")
 
-    return placeholder_df
-
 
 def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) -> pd.DataFrame:
     """
@@ -67,5 +63,3 @@ def summarize_monthly_region_appointments(pivot_practice_data: pd.DataFrame) ->
     logger.info("Summarizing the monthly region appointments.")
 
     logger.warning("This function is not yet implemented.")
-
-    return placeholder_df
diff --git a/code_your_own_pandas_pipeline/data_in.py b/code_your_own_pandas_pipeline/data_in.py
index e3df794..d3de9d6 100644
--- a/code_your_own_pandas_pipeline/data_in.py
+++ b/code_your_own_pandas_pipeline/data_in.py
@@ -1,12 +1,11 @@
 """
-This module contains the function to read the mapping and practice crosstab data from the data 
+This module contains the function to read the mapping and practice crosstab data from the data
 folder.
 """
+
 import pandas as pd
 from loguru import logger
 
-placeholder_df = pd.DataFrame()
-
 
 def read_mapping_data() -> pd.DataFrame:
     """
@@ -21,8 +20,6 @@ def read_mapping_data() -> pd.DataFrame:
 
     logger.warning("This function is not yet implemented.")
 
-    return placeholder_df
-
 
 def read_practice_crosstab_data() -> pd.DataFrame:
     """
@@ -37,8 +34,6 @@ def read_practice_crosstab_data() -> pd.DataFrame:
 
     logger.warning("This function is not yet implemented.")
 
-    return placeholder_df
-
 
 if __name__ == "__main__":
     read_mapping_data().head()
diff --git a/code_your_own_pandas_pipeline/pipeline.py b/code_your_own_pandas_pipeline/pipeline.py
index baaf377..64b8d2e 100644
--- a/code_your_own_pandas_pipeline/pipeline.py
+++ b/code_your_own_pandas_pipeline/pipeline.py
@@ -2,10 +2,10 @@
 Main pipeline for the code_your_own_pandas_pipeline package.
 """
 
+import pandas as pd
 from loguru import logger
 
-from code_your_own_pandas_pipeline import aggregations, config, data_in, plots, processing
-
+from code_your_own_pandas_pipeline import aggregations, data_in, plots, processing
 
 placeholder_df = pd.DataFrame()
 
@@ -36,5 +36,6 @@ def main() -> None:
 
     logger.success("GP Appointment Data Pipeline Completed")
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/code_your_own_pandas_pipeline/plots.py b/code_your_own_pandas_pipeline/plots.py
index faa77fd..cf4d9cd 100644
--- a/code_your_own_pandas_pipeline/plots.py
+++ b/code_your_own_pandas_pipeline/plots.py
@@ -69,4 +69,4 @@ def plot_monthly_region_appointments(
     """
     logger.info("Plotting the monthly region appointments.")
 
-    logger.warning("This function is not yet implemented.")
\ No newline at end of file
+    logger.warning("This function is not yet implemented.")
diff --git a/code_your_own_pandas_pipeline/processing.py b/code_your_own_pandas_pipeline/processing.py
index 87b77f0..bac11f3 100644
--- a/code_your_own_pandas_pipeline/processing.py
+++ b/code_your_own_pandas_pipeline/processing.py
@@ -26,8 +26,6 @@ def tidy_practice_level_data(practice_data: pd.DataFrame) -> pd.DataFrame:
 
     logger.warning("This function is not yet implemented.")
 
-    return placeholder_df
-
 
 def merge_mapping_and_practice_data(
     mapping_data: pd.DataFrame, practice_data: pd.DataFrame
@@ -50,5 +48,3 @@ def merge_mapping_and_practice_data(
     logger.info("Merging the mapping and practice data.")
 
     logger.warning("This function is not yet implemented.")
-
-    return placeholder_df

From 6e1eba07a6ea5192ce07452a49de0302e9059869 Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 20:24:34 +0000
Subject: [PATCH 5/6] Removes unimplemented function warning and unnecessary
 main block from data input module

---
 code_your_own_pandas_pipeline/data_in.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/code_your_own_pandas_pipeline/data_in.py b/code_your_own_pandas_pipeline/data_in.py
index d3de9d6..0212611 100644
--- a/code_your_own_pandas_pipeline/data_in.py
+++ b/code_your_own_pandas_pipeline/data_in.py
@@ -33,8 +33,3 @@ def read_practice_crosstab_data() -> pd.DataFrame:
     logger.info(f"Reading practice crosstab data from {""}")
 
     logger.warning("This function is not yet implemented.")
-
-
-if __name__ == "__main__":
-    read_mapping_data().head()
-    read_practice_crosstab_data().head()

From b0d615183bc0e6fcdef59fd787597f993bc3658a Mon Sep 17 00:00:00 2001
From: "joseph.wilson8-nhs" <joseph.wilson8@nhs.net>
Date: Tue, 17 Dec 2024 20:27:25 +0000
Subject: [PATCH 6/6] WIP Adds tests for pipeline code

---
 tests/unittests/test_aggregations.py | 212 +++++++++++++++++++++++++++
 tests/unittests/test_data_in.py      | 118 +++++++++++++++
 tests/unittests/test_pipeline.py     |  13 ++
 tests/unittests/test_plots.py        |  13 ++
 tests/unittests/test_processing.py   |  13 ++
 5 files changed, 369 insertions(+)
 create mode 100644 tests/unittests/test_aggregations.py
 create mode 100644 tests/unittests/test_data_in.py
 create mode 100644 tests/unittests/test_pipeline.py
 create mode 100644 tests/unittests/test_plots.py
 create mode 100644 tests/unittests/test_processing.py

diff --git a/tests/unittests/test_aggregations.py b/tests/unittests/test_aggregations.py
new file mode 100644
index 0000000..beb45f1
--- /dev/null
+++ b/tests/unittests/test_aggregations.py
@@ -0,0 +1,212 @@
+"""
+Tests for code_your_own_pandas_pipeline.aggregations
+"""
+
+import pandas as pd
+import pytest
+
+from code_your_own_pandas_pipeline.aggregations import (
+    pivot_practice_level_data,
+    summarize_monthly_gp_appointments,
+    summarize_monthly_region_appointments,
+)
+
+
+@pytest.fixture
+def practice_test_data():
+    return pd.DataFrame(
+        columns=[
+            "APPOINTMENT_MONTH_START_DATE",
+            "GP_NAME",
+            "REGION_NAME",
+            "APPT_STATUS",
+            "COUNT_OF_APPOINTMENTS",
+        ],
+        data=[
+            ["2021-01-01", "Example GP A", "REGION1", "ATTENDED", 1],
+            ["2021-01-01", "Example GP B", "REGION1", "ATTENDED", 4],
+            ["2021-02-01", "Example GP A", "REGION1", "ATTENDED", 7],
+            ["2021-02-01", "Example GP B", "REGION1", "ATTENDED", 10],
+            ["2021-01-01", "Example GP A", "REGION2", "ATTENDED", 1],
+            ["2021-01-01", "Example GP B", "REGION2", "ATTENDED", 4],
+            ["2021-02-01", "Example GP A", "REGION2", "ATTENDED", 7],
+            ["2021-02-01", "Example GP B", "REGION2", "ATTENDED", 10],
+            ["2021-01-01", "Example GP A", "REGION1", "ATTENDED", 1],
+            ["2021-01-01", "Example GP B", "REGION1", "ATTENDED", 4],
+            ["2021-02-01", "Example GP A", "REGION1", "ATTENDED", 7],
+            ["2021-02-01", "Example GP B", "REGION1", "ATTENDED", 10],
+            ["2021-01-01", "Example GP A", "REGION2", "ATTENDED", 1],
+            ["2021-01-01", "Example GP B", "REGION2", "ATTENDED", 4],
+            ["2021-02-01", "Example GP A", "REGION2", "ATTENDED", 7],
+            ["2021-02-01", "Example GP B", "REGION2", "ATTENDED", 10],
+            ["2021-01-01", "Example GP A", "REGION1", "DID NOT ATTEND", 2],
+            ["2021-01-01", "Example GP B", "REGION1", "DID NOT ATTEND", 5],
+            ["2021-02-01", "Example GP A", "REGION1", "DID NOT ATTEND", 8],
+            ["2021-02-01", "Example GP B", "REGION1", "DID NOT ATTEND", 11],
+            ["2021-01-01", "Example GP A", "REGION2", "DID NOT ATTEND", 2],
+            ["2021-01-01", "Example GP B", "REGION2", "DID NOT ATTEND", 5],
+            ["2021-02-01", "Example GP A", "REGION2", "DID NOT ATTEND", 8],
+            ["2021-02-01", "Example GP B", "REGION2", "DID NOT ATTEND", 11],
+            ["2021-01-01", "Example GP A", "REGION1", "DID NOT ATTEND", 2],
+            ["2021-01-01", "Example GP B", "REGION1", "DID NOT ATTEND", 5],
+            ["2021-02-01", "Example GP A", "REGION1", "DID NOT ATTEND", 8],
+            ["2021-02-01", "Example GP B", "REGION1", "DID NOT ATTEND", 11],
+            ["2021-01-01", "Example GP A", "REGION2", "DID NOT ATTEND", 2],
+            ["2021-01-01", "Example GP B", "REGION2", "DID NOT ATTEND", 5],
+            ["2021-02-01", "Example GP A", "REGION2", "DID NOT ATTEND", 8],
+            ["2021-02-01", "Example GP B", "REGION2", "DID NOT ATTEND", 11],
+            ["2021-01-01", "Example GP A", "REGION1", "UNKNOWN", 3],
+            ["2021-01-01", "Example GP B", "REGION1", "UNKNOWN", 6],
+            ["2021-02-01", "Example GP A", "REGION1", "UNKNOWN", 9],
+            ["2021-02-01", "Example GP B", "REGION1", "UNKNOWN", 12],
+            ["2021-01-01", "Example GP A", "REGION2", "UNKNOWN", 3],
+            ["2021-01-01", "Example GP B", "REGION2", "UNKNOWN", 6],
+            ["2021-02-01", "Example GP A", "REGION2", "UNKNOWN", 9],
+            ["2021-02-01", "Example GP B", "REGION2", "UNKNOWN", 12],
+            ["2021-01-01", "Example GP A", "REGION1", "UNKNOWN", 3],
+            ["2021-01-01", "Example GP B", "REGION1", "UNKNOWN", 6],
+            ["2021-02-01", "Example GP A", "REGION1", "UNKNOWN", 9],
+            ["2021-02-01", "Example GP B", "REGION1", "UNKNOWN", 12],
+            ["2021-01-01", "Example GP A", "REGION2", "UNKNOWN", 3],
+            ["2021-01-01", "Example GP B", "REGION2", "UNKNOWN", 6],
+            ["2021-02-01", "Example GP A", "REGION2", "UNKNOWN", 9],
+            ["2021-02-01", "Example GP B", "REGION2", "UNKNOWN", 12],
+        ],
+    )
+
+
+@pytest.fixture
+def practice_pivot_test_data():
+    return pd.DataFrame(
+        columns=[
+            "APPOINTMENT_MONTH_START_DATE",
+            "GP_NAME",
+            "REGION_NAME",
+            "ATTENDED",
+            "DID NOT ATTEND",
+            "UNKNOWN",
+        ],
+        data=[
+            ["2021-01-01", "Example GP A", "REGION1", 1, 2, 3],
+            ["2021-01-01", "Example GP B", "REGION1", 4, 5, 6],
+            ["2021-02-01", "Example GP A", "REGION1", 7, 8, 9],
+            ["2021-02-01", "Example GP B", "REGION1", 10, 11, 12],
+            ["2021-01-01", "Example GP A", "REGION2", 1, 2, 3],
+            ["2021-01-01", "Example GP B", "REGION2", 4, 5, 6],
+            ["2021-02-01", "Example GP A", "REGION2", 7, 8, 9],
+            ["2021-02-01", "Example GP B", "REGION2", 10, 11, 12],
+        ]
+        * 2,
+    )
+
+
+class TestPivotPracticeLevelData:
+    """
+    Tests for the pivot_practice_level_data function.
+    """
+
+    def test_returns_dataframe(self, practice_test_data):
+        """
+        Check that the function returns a DataFrame.
+        """
+        actual = pivot_practice_level_data(practice_test_data)
+        assert isinstance(actual, pd.DataFrame)
+
+    def test_return_not_empty(self, practice_test_data):
+        """
+        Check that the function returns a non-empty DataFrame.
+        """
+        actual = pivot_practice_level_data(practice_test_data)
+        assert not actual.empty
+
+    def test_return_pivoted_data(self, practice_test_data):
+        """
+        Check that the function returns the pivoted data.
+        """
+        actual = pivot_practice_level_data(practice_test_data, practice_pivot_test_data)
+        expected = practice_pivot_test_data
+        assert actual.assert_frame_equal(expected)
+
+
+class TestSummarizeMonthlyGPAppointments:
+    """
+    Tests for the summarize_monthly_gp_appointments function.
+    """
+
+    def test_returns_dataframe(self, practice_pivot_test_data):
+        """
+        Check that the function returns a DataFrame.
+        """
+        actual = summarize_monthly_gp_appointments(practice_pivot_test_data)
+        assert isinstance(actual, pd.DataFrame)
+
+    def test_return_not_empty(self, practice_pivot_test_data):
+        """
+        Check that the function returns a non-empty DataFrame.
+        """
+        actual = summarize_monthly_gp_appointments(practice_pivot_test_data)
+        assert not actual.empty
+
+    def test_return_summarized_data(self, practice_pivot_test_data):
+        """
+        Check that the function returns the summarized data.
+        """
+        actual = summarize_monthly_gp_appointments(practice_pivot_test_data)
+        expected = pd.DataFrame(
+            [
+                ["2021-01-01", "Example GP A", 4, 8, 12],
+                ["2021-01-01", "Example GP B", 16, 20, 24],
+                ["2021-02-01", "Example GP A", 28, 32, 36],
+                ["2021-02-01", "Example GP B", 40, 44, 48],
+            ],
+            columns=[
+                "APPOINTMENT_MONTH_START_DATE",
+                "GP_NAME",
+                "ATTENDED",
+                "DID NOT ATTEND",
+                "UNKNOWN",
+            ],
+        )
+        assert actual.assert_frame_equal(expected)
+
+
+class TestSummarizeMonthlyRegionAppointments:
+    """
+    Tests for the summarize_monthly_region_appointments function.
+    """
+
+    def test_returns_dataframe(self, practice_pivot_test_data):
+        """
+        Check that the function returns a DataFrame.
+        """
+        actual = summarize_monthly_region_appointments(practice_pivot_test_data)
+        assert isinstance(actual, pd.DataFrame)
+
+    def test_return_not_empty(self, practice_pivot_test_data):
+        """
+        Check that the function returns a non-empty DataFrame.
+        """
+        actual = summarize_monthly_region_appointments(practice_pivot_test_data)
+        assert not actual.empty
+
+    def test_return_summarized_data(self, practice_pivot_test_data):
+        """
+        Check that the function returns the summarized data.
+        """
+        actual = summarize_monthly_region_appointments(practice_pivot_test_data)
+        expected = pd.DataFrame(
+            [
+                ["2021-01-01", "Example GP A", 4, 8, 12],
+                ["2021-01-01", "Example GP B", 16, 20, 24],
+                ["2021-02-01", "Example GP A", 28, 32, 36],
+                ["2021-02-01", "Example GP B", 40, 44, 48],
+            ],
+            columns=[
+                "APPOINTMENT_MONTH_START_DATE",
+                "GP_NAME",
+                "ATTENDED",
+                "DID NOT ATTEND",
+                "UNKNOWN",
+            ],
+        )
+        assert actual.assert_frame_equal(expected)
diff --git a/tests/unittests/test_data_in.py b/tests/unittests/test_data_in.py
new file mode 100644
index 0000000..cbb8b08
--- /dev/null
+++ b/tests/unittests/test_data_in.py
@@ -0,0 +1,118 @@
+"""
+Tests for code_your_own_pandas_pipeline.data_in
+"""
+import pandas as pd
+import numpy as np
+
+from code_your_own_pandas_pipeline.data_in import read_mapping_data, read_practice_crosstab_data
+
+
+class TestReadMappingData:
+    """
+    Tests for the read_mapping_data function.
+    """
+
+    def test_return_type(self):
+        """
+        Test that the read_mapping_data function returns a pandas DataFrame.
+        """
+        actual = read_mapping_data()
+
+        assert isinstance(actual, pd.DataFrame)
+
+    def test_return_not_empty(self):
+        """
+        Tests that the read_mapping_data function returns a non-empty DataFrame
+        """
+        actual = read_mapping_data()
+
+        assert not actual.empty
+
+    def test_shape(self):
+        """
+        Tests that the read_mapping_data function returns a DataFrame with the correct shape.
+        """
+        actual = read_mapping_data()
+
+        assert actual.shape == (6241, 11)
+
+    def test_schema(self):
+        """
+        Tests that the read_mapping_data function returns a DataFrame with the correct schema.
+        """
+        actual = read_mapping_data()
+
+        expected_schema = pd.Series(
+            {
+                "GP_CODE": np.dtype("O"),
+                "GP_NAME": np.dtype("O"),
+                "SUPPLIER": np.dtype("O"),
+                "PCN_CODE": np.dtype("O"),
+                "PCN_NAME": np.dtype("O"),
+                "SUB_ICB_LOCATION_CODE": np.dtype("O"),
+                "SUB_ICB_LOCATION_NAME": np.dtype("O"),
+                "ICB_CODE": np.dtype("O"),
+                "ICB_NAME": np.dtype("O"),
+                "REGION_CODE": np.dtype("O"),
+                "REGION_NAME": np.dtype("O"),
+            }
+        )
+
+        assert expected_schema.assert_series_equal(actual.dtypes)
+
+
+class TestReadPracticeCrosstabData:
+    """
+    Tests for the read_practice_crosstab_data function.
+    """
+
+    def test_return_type(self):
+        """
+        Test that the read_practice_crosstab_data function returns a pandas DataFrame.
+        """
+        actual = read_practice_crosstab_data()
+
+        assert isinstance(actual, pd.DataFrame)
+
+    def test_return_not_empty(self):
+        """
+        Tests that the read_practice_crosstab_data function returns a non-empty DataFrame
+        """
+        actual = read_practice_crosstab_data()
+
+        assert not actual.empty
+
+    def test_shape(self):
+        """
+        Tests that the read_practice_crosstab_data function returns a DataFrame with the correct shape.
+        """
+        actual = read_practice_crosstab_data()
+
+        assert actual.shape == (2971190, 14)
+
+    def test_schema(self):
+        """
+        Tests that the read_practice_crosstab_data function returns a DataFrame with the correct schema.
+        """
+        actual = read_practice_crosstab_data()
+
+        expected_schema = pd.Series(
+            {
+                "APPOINTMENT_MONTH_START_DATE": np.dtype("O"),
+                "GP_CODE": np.dtype("O"),
+                "GP_NAME": np.dtype("O"),
+                "SUPPLIER": np.dtype("O"),
+                "PCN_CODE": np.dtype("O"),
+                "PCN_NAME": np.dtype("O"),
+                "SUB_ICB_LOCATION_CODE": np.dtype("O"),
+                "SUB_ICB_LOCATION_NAME": np.dtype("O"),
+                "HCP_TYPE": np.dtype("O"),
+                "APPT_MODE": np.dtype("O"),
+                "NATIONAL_CATEGORY": np.dtype("O"),
+                "TIME_BETWEEN_BOOK_AND_APPT": np.dtype("O"),
+                "COUNT_OF_APPOINTMENTS": np.dtype("int64"),
+                "APPT_STATUS": np.dtype("O"),
+            }
+        )
+
+        assert expected_schema.assert_series_equal(actual.dtypes)
diff --git a/tests/unittests/test_pipeline.py b/tests/unittests/test_pipeline.py
new file mode 100644
index 0000000..59c441a
--- /dev/null
+++ b/tests/unittests/test_pipeline.py
@@ -0,0 +1,13 @@
+"""
+Tests for code_your_own_pandas_pipeline.pipeline
+"""
+import pytest
+
+import code_your_own_pandas_pipeline.pipeline
+
+
+class TestExample:
+    """Example test class"""
+    def test_example(self):
+        """Example test case"""
+        assert True
diff --git a/tests/unittests/test_plots.py b/tests/unittests/test_plots.py
new file mode 100644
index 0000000..c4d1fed
--- /dev/null
+++ b/tests/unittests/test_plots.py
@@ -0,0 +1,13 @@
+"""
+Tests for code_your_own_pandas_pipeline.plots
+"""
+import pytest
+
+import code_your_own_pandas_pipeline.plots
+
+
+class TestExample:
+    """Example test class"""
+    def test_example(self):
+        """Example test case"""
+        assert True
diff --git a/tests/unittests/test_processing.py b/tests/unittests/test_processing.py
new file mode 100644
index 0000000..243dbe9
--- /dev/null
+++ b/tests/unittests/test_processing.py
@@ -0,0 +1,13 @@
+"""
+Tests for code_your_own_pandas_pipeline.processing
+"""
+import pytest
+
+import code_your_own_pandas_pipeline.processing
+
+
+class TestExample:
+    """Example test class"""
+    def test_example(self):
+        """Example test case"""
+        assert True