From b014dbb9927bc1f7c67028eb85354418b043bcfb Mon Sep 17 00:00:00 2001
From: Edoardo Baldi <edoardob90@gmail.com>
Date: Mon, 12 May 2025 22:46:26 +0200
Subject: [PATCH 1/4] Some quality-of-life enhancement to toc.py script

Add proper error handling, logging, command-line options, and improved
documentation. Clean anchor generation, add version, and enhance
CLI help text with better descriptions and examples.
---
 tutorial/toc.py | 182 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 159 insertions(+), 23 deletions(-)

diff --git a/tutorial/toc.py b/tutorial/toc.py
index c1a382ed..444a36dd 100755
--- a/tutorial/toc.py
+++ b/tutorial/toc.py
@@ -1,39 +1,86 @@
 #!/usr/bin/env python
+# ruff: noqa G004
 """CLI script to build a table of contents for an IPython notebook"""
 
 import argparse as ap
+import logging
 import pathlib
 import re
+import sys
 from collections import namedtuple
 
 import nbformat
 from nbformat import NotebookNode
 
+__version__ = "0.1.1"
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+logger = logging.getLogger("toc")
+
 TocEntry = namedtuple("TocEntry", ["level", "text", "anchor"])
 
 
 def extract_markdown_cells(notebook: NotebookNode) -> str:
-    """Extract the markdown cells from a notebook"""
+    """Extract the markdown cells from a notebook
+
+    Args:
+        notebook: A notebook object
+
+    Returns:
+        str: Concatenated content of all markdown cells
+    """
     return "\n".join(
         [cell.source for cell in notebook.cells if cell.cell_type == "markdown"]
     )
 
 
 def extract_toc(notebook: str) -> list[TocEntry]:
-    """Extract the table of contents from a markdown string"""
+    """Extract the table of contents from a markdown string
+
+    Parses markdown headings (lines starting with #) and converts them to TOC entries.
+    Each entry includes the heading level, text, and an anchor derived from the text.
+
+    Args:
+        notebook: String containing markdown content
+
+    Returns:
+        list[TocEntry]: List of table of contents entries
+    """
     toc = []
     line_re = re.compile(r"(#+)\s+(.+)")
+    line_num = 0
+
     for line in notebook.splitlines():
+        line_num += 1
         if groups := re.match(line_re, line):
-            heading, text, *_ = groups.groups()
-            level = len(heading)
-            anchor = "-".join(text.replace("`", "").split())
-            toc.append(TocEntry(level, text, anchor))
+            try:
+                heading, text, *_ = groups.groups()
+                level = len(heading)
+
+                # Clean the text to make a proper anchor
+                clean_text = text.replace("`", "")
+                # Remove any other special characters that might break anchors
+                clean_text = re.sub(r"[^\w\s-]", "", clean_text)
+                anchor = "-".join(clean_text.lower().split())
+
+                toc.append(TocEntry(level, text, anchor))
+                logger.debug(f"Found heading (level {level}): {text}")
+            except Exception as e:
+                logger.warning(f"Error processing heading at line {line_num}: {e}")
+
     return toc
 
 
 def markdown_toc(toc: list[TocEntry]) -> str:
-    """Build a string representation of the toc as a nested markdown list"""
+    """Build a string representation of the toc as a nested markdown list
+
+    Args:
+        toc: List of TocEntry objects
+
+    Returns:
+        str: Markdown-formatted table of contents with proper indentation
+    """
     lines = []
     for entry in toc:
         line = f"{'  ' * entry.level}- [{entry.text}](#{entry.anchor})"
@@ -41,33 +88,69 @@ def markdown_toc(toc: list[TocEntry]) -> str:
     return "\n".join(lines)
 
 
-def build_toc(nb_path: pathlib.Path, placeholder: str = "[TOC]") -> NotebookNode:
-    """Build a table of contents for a notebook and insert it at the location of a placeholder"""
+def build_toc(
+    nb_path: pathlib.Path,
+    placeholder: str = "[TOC]",
+    toc_header: str = "# Table of Contents",
+) -> tuple[NotebookNode, bool]:
+    """Build a table of contents for a notebook and insert it at the location of a placeholder
+
+    Args:
+        nb_path: Path to the notebook file
+        placeholder: The text to replace with the generated TOC (default: "[TOC]")
+        toc_header: The header text to use for the TOC (default: "# Table of Contents")
+
+    Returns:
+        tuple[NotebookNode, bool]: The notebook with TOC inserted and a boolean indicating if placeholder was found
+    """
     # Read the notebook
-    nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)
+    try:
+        nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)
+    except Exception as e:
+        logger.error(f"Failed to read notebook '{nb_path}': {e}")
+        raise
+
     md_cells = extract_markdown_cells(nb_obj)
 
     # Build tree
     toc_tree = extract_toc(md_cells)
 
+    if not toc_tree:
+        logger.warning(f"No headings found in notebook '{nb_path}'")
+
     # Build toc representation
     toc_repr = markdown_toc(toc_tree)
 
-    # Insert it a the location of a placeholder
-    toc_header = "# Table of Contents"
+    # Insert it at the location of a placeholder
+    toc_replaced = False
 
     for cell in nb_obj.cells:
         if cell.source.startswith((placeholder, toc_header)):
             cell.source = f"{toc_header}\n{toc_repr}"
             cell.cell_type = "markdown"
+            toc_replaced = True
+            break
+
+    if not toc_replaced:
+        logger.warning(
+            f"Placeholder '{placeholder}' or heading '{toc_header}' not found in notebook"
+        )
 
-    return nb_obj
+    return nb_obj, toc_replaced
 
 
 def main():
     """CLI entry point"""
     parser = ap.ArgumentParser(
-        description="Build a table of contents for an IPython notebook"
+        description="Build a table of contents for an IPython notebook",
+        epilog="""
+        This script extracts headings from markdown cells in a Jupyter notebook and
+        generates a markdown-formatted table of contents. The TOC is inserted into
+        the notebook at the location of a placeholder (default: '[TOC]') or where
+        a '# Table of Contents' heading exists. Links in the TOC point to notebook
+        anchors created from the heading text.
+        """,
+        formatter_class=ap.RawDescriptionHelpFormatter,
     )
     parser.add_argument("notebook", type=str, help="Path to the notebook to process")
     parser.add_argument(
@@ -80,22 +163,75 @@ def main():
         default=False,
         help="Force overwrite of original notebook",
     )
+    parser.add_argument(
+        "--placeholder",
+        "-p",
+        type=str,
+        default="[TOC]",
+        help="Placeholder text to replace with the TOC (default: '[TOC]')",
+    )
+    parser.add_argument(
+        "--header",
+        type=str,
+        default="# Table of Contents",
+        help="Header text for the TOC (default: '# Table of Contents')",
+    )
+    parser.add_argument(
+        "--verbose", "-v", action="store_true", help="Enable verbose output"
+    )
+    parser.add_argument(
+        "--version", action="version", version=f"%(prog)s {__version__}"
+    )
     args = parser.parse_args()
 
-    if not (input_nb := pathlib.Path(args.notebook)).exists():
-        raise FileNotFoundError(input_nb)
-
+    # Set logging level based on verbosity
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
+    # Validate input file
+    try:
+        input_nb = pathlib.Path(args.notebook)
+        if not input_nb.exists():
+            logger.error(f"Input file not found: {input_nb}")
+            sys.exit(1)
+        if not input_nb.is_file():
+            logger.error(f"Input path is not a file: {input_nb}")
+            sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error processing input path: {e}")
+        sys.exit(1)
+
+    # Set output file path
     if args.output is None:
         output_nb = input_nb.with_suffix(".toc.ipynb")
     else:
         output_nb = pathlib.Path(args.output)
 
-    with output_nb.open("w", encoding="utf-8") as file:
-        nbformat.write(build_toc(input_nb), file)
-
-    if args.force:
-        input_nb.unlink()
-        output_nb.rename(input_nb)
+    # Create output directory if it doesn't exist
+    output_nb.parent.mkdir(parents=True, exist_ok=True)
+
+    try:
+        # Generate TOC and write to output file
+        logger.info(f"Processing notebook: {input_nb}")
+        toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header)
+
+        if not toc_replaced:
+            logger.warning("Skipping output - no placeholder found in notebook")
+            sys.exit(0)  # Exit with success code since it's not an error
+
+        with output_nb.open("w", encoding="utf-8") as file:
+            nbformat.write(toc_notebook, file)
+        logger.info(f"TOC written to: {output_nb}")
+
+        # Handle force option
+        if args.force:
+            logger.info(f"Replacing original notebook with TOC version")
+            input_nb.unlink()
+            output_nb.rename(input_nb)
+            logger.info(f"Original notebook replaced with: {input_nb}")
+    except Exception as e:
+        logger.error(f"Error processing notebook: {e}")
+        sys.exit(1)
 
 
 if __name__ == "__main__":

From ab79959717bdfb007f6114b4c1f8fd97832fa5f6 Mon Sep 17 00:00:00 2001
From: Edoardo Baldi <edoardob90@gmail.com>
Date: Tue, 13 May 2025 07:31:23 +0200
Subject: [PATCH 2/4] [skip ci] Improve logging

Replace f-strings with %-style formatting in logger calls for better
performance. Use logger.exception() for better error reporting with
tracebacks.
---
 tutorial/toc.py | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/tutorial/toc.py b/tutorial/toc.py
index 444a36dd..c020c567 100755
--- a/tutorial/toc.py
+++ b/tutorial/toc.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# ruff: noqa G004
 """CLI script to build a table of contents for an IPython notebook"""
 
 import argparse as ap
@@ -12,7 +11,7 @@
 import nbformat
 from nbformat import NotebookNode
 
-__version__ = "0.1.1"
+__version__ = "0.1.2"
 
 # Set up logging
 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
@@ -65,9 +64,9 @@ def extract_toc(notebook: str) -> list[TocEntry]:
                 anchor = "-".join(clean_text.lower().split())
 
                 toc.append(TocEntry(level, text, anchor))
-                logger.debug(f"Found heading (level {level}): {text}")
+                logger.debug("Found heading (level %d): %s", level, text)
             except Exception as e:
-                logger.warning(f"Error processing heading at line {line_num}: {e}")
+                logger.warning("Error processing heading at line %d: %s", line_num, e)
 
     return toc
 
@@ -106,8 +105,8 @@ def build_toc(
     # Read the notebook
     try:
         nb_obj: NotebookNode = nbformat.read(nb_path, nbformat.NO_CONVERT)
-    except Exception as e:
-        logger.error(f"Failed to read notebook '{nb_path}': {e}")
+    except Exception:
+        logger.exception("Failed to read notebook '%s'", nb_path)
         raise
 
     md_cells = extract_markdown_cells(nb_obj)
@@ -116,7 +115,7 @@ def build_toc(
     toc_tree = extract_toc(md_cells)
 
     if not toc_tree:
-        logger.warning(f"No headings found in notebook '{nb_path}'")
+        logger.warning("No headings found in notebook '%s'", nb_path)
 
     # Build toc representation
     toc_repr = markdown_toc(toc_tree)
@@ -133,7 +132,9 @@ def build_toc(
 
     if not toc_replaced:
         logger.warning(
-            f"Placeholder '{placeholder}' or heading '{toc_header}' not found in notebook"
+            "Placeholder '%s' or heading '%s' not found in notebook",
+            placeholder,
+            toc_header,
         )
 
     return nb_obj, toc_replaced
@@ -192,13 +193,13 @@ def main():
     try:
         input_nb = pathlib.Path(args.notebook)
         if not input_nb.exists():
-            logger.error(f"Input file not found: {input_nb}")
+            logger.error("Input file not found: %s", input_nb)
             sys.exit(1)
         if not input_nb.is_file():
-            logger.error(f"Input path is not a file: {input_nb}")
+            logger.error("Input path is not a file: %s", input_nb)
             sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error processing input path: {e}")
+    except Exception:
+        logger.exception("Error processing input path")
         sys.exit(1)
 
     # Set output file path
@@ -212,7 +213,7 @@ def main():
 
     try:
         # Generate TOC and write to output file
-        logger.info(f"Processing notebook: {input_nb}")
+        logger.info("Processing notebook: %s", input_nb)
         toc_notebook, toc_replaced = build_toc(input_nb, args.placeholder, args.header)
 
         if not toc_replaced:
@@ -221,16 +222,16 @@ def main():
 
         with output_nb.open("w", encoding="utf-8") as file:
             nbformat.write(toc_notebook, file)
-        logger.info(f"TOC written to: {output_nb}")
+        logger.info("TOC written to: %s", output_nb)
 
         # Handle force option
         if args.force:
-            logger.info(f"Replacing original notebook with TOC version")
+            logger.info("Replacing original notebook with TOC version")
             input_nb.unlink()
             output_nb.rename(input_nb)
-            logger.info(f"Original notebook replaced with: {input_nb}")
-    except Exception as e:
-        logger.error(f"Error processing notebook: {e}")
+            logger.info("Original notebook replaced with: %s", input_nb)
+    except Exception:
+        logger.exception("Error processing notebook")
         sys.exit(1)
 
 

From a6a2c55f1384ef9e9aba88359b93d7dff670f9eb Mon Sep 17 00:00:00 2001
From: Edoardo Baldi <edoardob90@gmail.com>
Date: Tue, 13 May 2025 21:43:02 +0200
Subject: [PATCH 3/4] Fix TOC generation for headings inside code blocks

Upgrade TocEntry from namedtuple to NamedTuple with type hints.
Improve logic for detecting and ignoring markdown headers inside
code blocks to prevent incorrect TOC entries. Also refactor the
file output handling for better organization when using --force.
---
 tutorial/toc.py | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/tutorial/toc.py b/tutorial/toc.py
index c020c567..ec5c682a 100755
--- a/tutorial/toc.py
+++ b/tutorial/toc.py
@@ -6,7 +6,7 @@
 import pathlib
 import re
 import sys
-from collections import namedtuple
+from typing import NamedTuple
 
 import nbformat
 from nbformat import NotebookNode
@@ -17,7 +17,13 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
 logger = logging.getLogger("toc")
 
-TocEntry = namedtuple("TocEntry", ["level", "text", "anchor"])
+
+class TocEntry(NamedTuple):
+    """Table of contents entry"""
+
+    level: int
+    text: str
+    anchor: str
 
 
 def extract_markdown_cells(notebook: NotebookNode) -> str:
@@ -39,6 +45,7 @@ def extract_toc(notebook: str) -> list[TocEntry]:
 
     Parses markdown headings (lines starting with #) and converts them to TOC entries.
     Each entry includes the heading level, text, and an anchor derived from the text.
+    Ignores '#' symbols inside code blocks.
 
     Args:
         notebook: String containing markdown content
@@ -49,9 +56,21 @@ def extract_toc(notebook: str) -> list[TocEntry]:
     toc = []
     line_re = re.compile(r"(#+)\s+(.+)")
     line_num = 0
+    is_code_block = False
 
     for line in notebook.splitlines():
         line_num += 1
+
+        # Check if we're entering or exiting a code block
+        if line.strip().startswith("```"):
+            is_code_block = not is_code_block
+            continue
+
+        # Skip header processing if we're in a code block
+        if is_code_block:
+            continue
+
+        # Process headers only when not in a code block
         if groups := re.match(line_re, line):
             try:
                 heading, text, *_ = groups.groups()
@@ -220,16 +239,21 @@ def main():
             logger.warning("Skipping output - no placeholder found in notebook")
             sys.exit(0)  # Exit with success code since it's not an error
 
-        with output_nb.open("w", encoding="utf-8") as file:
-            nbformat.write(toc_notebook, file)
-        logger.info("TOC written to: %s", output_nb)
+        if not args.force:
+            logger.debug("Ignoring output file: %s", output_nb)
 
-        # Handle force option
-        if args.force:
+            with output_nb.open("w", encoding="utf-8") as file:
+                nbformat.write(toc_notebook, file)
+
+            logger.info("TOC written to: %s", output_nb)
+        else:
             logger.info("Replacing original notebook with TOC version")
-            input_nb.unlink()
-            output_nb.rename(input_nb)
+
+            with input_nb.open("w", encoding="utf-8") as file:
+                nbformat.write(toc_notebook, file)
+
             logger.info("Original notebook replaced with: %s", input_nb)
+
     except Exception:
         logger.exception("Error processing notebook")
         sys.exit(1)

From 2024e0688067c2b428792008fa955e431cbfe5df Mon Sep 17 00:00:00 2001
From: Edoardo Baldi <edoardob90@gmail.com>
Date: Tue, 13 May 2025 21:49:37 +0200
Subject: [PATCH 4/4] Skip TOC header when generating table of contents

---
 tutorial/toc.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tutorial/toc.py b/tutorial/toc.py
index ec5c682a..f95831e9 100755
--- a/tutorial/toc.py
+++ b/tutorial/toc.py
@@ -40,7 +40,7 @@ def extract_markdown_cells(notebook: NotebookNode) -> str:
     )
 
 
-def extract_toc(notebook: str) -> list[TocEntry]:
+def extract_toc(notebook: str, toc_header: str) -> list[TocEntry]:
     """Extract the table of contents from a markdown string
 
     Parses markdown headings (lines starting with #) and converts them to TOC entries.
@@ -49,17 +49,19 @@ def extract_toc(notebook: str) -> list[TocEntry]:
 
     Args:
         notebook: String containing markdown content
+        toc_header: Header text for the table of contents
 
     Returns:
         list[TocEntry]: List of table of contents entries
     """
     toc = []
     line_re = re.compile(r"(#+)\s+(.+)")
-    line_num = 0
     is_code_block = False
 
-    for line in notebook.splitlines():
-        line_num += 1
+    for line_num, line in enumerate(notebook.splitlines(), start=1):
+        # Skip line if contains exactly the toc header
+        if line.strip() == toc_header:
+            continue
 
         # Check if we're entering or exiting a code block
         if line.strip().startswith("```"):
@@ -70,7 +72,7 @@ def extract_toc(notebook: str) -> list[TocEntry]:
         if is_code_block:
             continue
 
-        # Process headers only when not in a code block
+        # Process headers
         if groups := re.match(line_re, line):
             try:
                 heading, text, *_ = groups.groups()
@@ -84,6 +86,7 @@ def extract_toc(notebook: str) -> list[TocEntry]:
 
                 toc.append(TocEntry(level, text, anchor))
                 logger.debug("Found heading (level %d): %s", level, text)
+
             except Exception as e:
                 logger.warning("Error processing heading at line %d: %s", line_num, e)
 
@@ -131,7 +134,7 @@ def build_toc(
     md_cells = extract_markdown_cells(nb_obj)
 
     # Build tree
-    toc_tree = extract_toc(md_cells)
+    toc_tree = extract_toc(md_cells, toc_header)
 
     if not toc_tree:
         logger.warning("No headings found in notebook '%s'", nb_path)