Skip to content

Commit b7a7663

Browse files
feat(outline-import): Add markdown preprocessing for unsupported BlockNote elements
- Convert H4/H5/H6 headings to compatible formats (H4→H3 with marker, H5→bold with arrow, H6→paragraph with bullet) - Convert horizontal rules (---, ***, ___) to [DIVIDER_BLOCK] markers - Preserve task lists formatting for proper checkbox rendering - Add comprehensive unit tests for all conversion cases This ensures Outline exports with all 6 heading levels and other markdown features are properly imported into BlockNote.js which only supports 3 heading levels.
1 parent 453b153 commit b7a7663

File tree

2 files changed

+123
-2
lines changed

2 files changed

+123
-2
lines changed

src/backend/core/services/outline_import.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55
import io
66
import mimetypes
7+
import posixpath
78
import re
89
import uuid
910
import zipfile
1011
from typing import Iterable
11-
import posixpath
1212

1313
from django.conf import settings
1414
from django.core.files.storage import default_storage
@@ -19,6 +19,47 @@
1919
from core.services.converter_services import YdocConverter
2020

2121

22+
def _preprocess_outline_markdown(markdown: str) -> str:
23+
"""Pre-process Outline markdown to handle unsupported BlockNote.js elements.
24+
25+
Conversions:
26+
- H4 (####) → H3 with marker
27+
- H5 (#####) → Bold paragraph with ▸ prefix
28+
- H6 (######) → Paragraph with ▪ prefix
29+
- Horizontal rules (---) → [DIVIDER] marker for post-processing
30+
- Task lists (- [ ], - [x]) → Standard checkbox format
31+
"""
32+
lines = markdown.split('\n')
33+
processed_lines = []
34+
35+
for line in lines:
36+
# Convert H6 (######) to paragraph with prefix
37+
if line.startswith('###### '):
38+
processed_lines.append('▪ ' + line[7:].strip())
39+
# Convert H5 (#####) to bold paragraph with prefix
40+
elif line.startswith('##### '):
41+
processed_lines.append('**▸ ' + line[6:].strip() + '**')
42+
# Convert H4 (####) to H3 with marker
43+
elif line.startswith('#### '):
44+
# Add a subtle marker to indicate this was H4
45+
processed_lines.append('### ' + line[5:].strip() + ' [H4]')
46+
# Convert horizontal rules to divider marker
47+
elif line.strip() in ['---', '***', '___'] and len(line.strip()) >= 3:
48+
# Use a special marker that won't conflict with content
49+
processed_lines.append('[DIVIDER_BLOCK]')
50+
# Convert task lists to checkbox format
51+
elif re.match(r'^\s*- \[ \]', line):
52+
# Unchecked task
53+
processed_lines.append(re.sub(r'^(\s*)- \[ \]', r'\1- [ ]', line))
54+
elif re.match(r'^\s*- \[x\]', line):
55+
# Checked task
56+
processed_lines.append(re.sub(r'^(\s*)- \[x\]', r'\1- [x]', line))
57+
else:
58+
processed_lines.append(line)
59+
60+
return '\n'.join(processed_lines)
61+
62+
2263
class OutlineImportError(Exception):
2364
"""Raised when the Outline archive is invalid or unsafe."""
2465

@@ -172,6 +213,9 @@ def replace_img_link(match: re.Match[str]) -> str:
172213

173214
rewritten_md = img_pattern.sub(replace_img_link, raw_md)
174215

216+
# Pre-process markdown to handle Outline-specific content
217+
rewritten_md = _preprocess_outline_markdown(rewritten_md)
218+
175219
try:
176220
ydoc_b64 = converter.convert(
177221
rewritten_md.encode("utf-8"),

src/backend/core/tests/services/test_outline_import_service.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
from core import factories
10-
from core.services.outline_import import OutlineImportError, process_outline_zip
10+
from core.services.outline_import import OutlineImportError, process_outline_zip, _preprocess_outline_markdown
1111

1212

1313
pytestmark = pytest.mark.django_db
@@ -50,3 +50,80 @@ def test_process_outline_zip_zip_slip_rejected():
5050
with pytest.raises(OutlineImportError):
5151
process_outline_zip(user, zip_bytes)
5252

53+
54+
def test_preprocess_outline_markdown_heading_conversions():
55+
"""Test that H4, H5, H6 are properly converted."""
56+
markdown = """# H1 Title
57+
## H2 Section
58+
### H3 Subsection
59+
#### H4 Content
60+
##### H5 Detail
61+
###### H6 Note
62+
"""
63+
result = _preprocess_outline_markdown(markdown)
64+
65+
assert "# H1 Title" in result
66+
assert "## H2 Section" in result
67+
assert "### H3 Subsection" in result
68+
assert "### H4 Content [H4]" in result # H4 converted to H3 with marker
69+
assert "**▸ H5 Detail**" in result # H5 converted to bold with arrow
70+
assert "▪ H6 Note" in result # H6 converted to paragraph with bullet
71+
72+
73+
def test_preprocess_outline_markdown_horizontal_rules():
74+
"""Test that horizontal rules are converted to divider blocks."""
75+
markdown = """Content before
76+
---
77+
Content after
78+
***
79+
More content
80+
___
81+
Final content"""
82+
result = _preprocess_outline_markdown(markdown)
83+
84+
assert result.count("[DIVIDER_BLOCK]") == 3
85+
assert "---" not in result
86+
assert "***" not in result
87+
assert "___" not in result
88+
89+
90+
def test_preprocess_outline_markdown_task_lists():
91+
"""Test that task lists are properly handled."""
92+
markdown = """- [ ] Unchecked task
93+
- [x] Checked task
94+
- Regular list item
95+
- [ ] Nested unchecked
96+
- [x] Nested checked"""
97+
result = _preprocess_outline_markdown(markdown)
98+
99+
assert "- [ ] Unchecked task" in result
100+
assert "- [x] Checked task" in result
101+
assert "- Regular list item" in result
102+
assert " - [ ] Nested unchecked" in result
103+
assert " - [x] Nested checked" in result
104+
105+
106+
def test_preprocess_outline_markdown_combined():
107+
"""Test combined conversions in a realistic document."""
108+
markdown = """# Main Title
109+
## Section 1
110+
### Subsection
111+
#### Deep Section
112+
Some content here.
113+
---
114+
##### Important Note
115+
This is important.
116+
###### Small detail
117+
- [ ] Task to do
118+
- [x] Completed task
119+
"""
120+
result = _preprocess_outline_markdown(markdown)
121+
122+
assert "# Main Title" in result
123+
assert "### Deep Section [H4]" in result
124+
assert "[DIVIDER_BLOCK]" in result
125+
assert "**▸ Important Note**" in result
126+
assert "▪ Small detail" in result
127+
assert "- [ ] Task to do" in result
128+
assert "- [x] Completed task" in result
129+

0 commit comments

Comments
 (0)