Skip to content

Commit 9c3c300

Browse files
Archmongerrmorshea
andauthored
html_to_vdom transform to remove html/body but preserve head content (#832)
* Insert head content into html_to_vdom * more efficient node appending * add changelog * nodes -> body node * parsed_document -> html_node * remove useless has_root_node variable * fix comment * del_html_body_transform * fix type hint errors * type hint fixes 2 * remove unusable head and body tags * uno mas * import future annotations * fix docs warnings * clean up last warning * fix docstrings * del_html_body_transform docstrings * re-add head API * fix changelog PR links * docstring cleanup * Better type hint * Revert "Better type hint" This reverts commit d78afdb. * more concise transform implementation * fix merge error * merge changelog better * annotate as VdomDict Co-authored-by: Ryan Morshead <[email protected]>
1 parent 98a3d1e commit 9c3c300

File tree

4 files changed

+73
-40
lines changed

4 files changed

+73
-40
lines changed

docs/source/about/changelog.rst

+7-5
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,23 @@ more info, see the :ref:`Contributor Guide <Creating a Changelog Entry>`.
2323
Unreleased
2424
----------
2525

26-
**Removed**
27-
28-
- :pull:`840` - Remove ``IDOM_FEATURE_INDEX_AS_DEFAULT_KEY`` option
29-
- :pull:`835` - ``serve_static_files`` option from backend configuration
30-
3126
**Added**
3227

3328
- :pull:`835` - Ability to customize the ``<head>`` element of IDOM's built-in client.
3429
- :pull:`835` - ``vdom_to_html`` utility function.
3530
- :pull:`843` - Ability to subscribe to changes that are made to mutable options.
31+
- :pull:`832` - ``del_html_head_body_transform`` to remove ``<html>``, ``<head>``, and ``<body>`` while preserving children.
3632
- :pull:`699` - Support for form element serialization
3733

3834
**Fixed**
3935

4036
- :issue:`582` - ``IDOM_DEBUG_MODE`` is now mutable and can be changed at runtime
37+
- :pull:`832` - Fix ``html_to_vdom`` improperly removing ``<html>``, ``<head>``, and ``<body>`` nodes.
38+
39+
**Removed**
40+
- :pull:`832` - Removed ``idom.html.body`` as it is currently unusable due to technological limitations, and thus not needed.
41+
- :pull:`840` - remove ``IDOM_FEATURE_INDEX_AS_DEFAULT_KEY`` option
42+
- :pull:`835` - ``serve_static_files`` option from backend configuration
4143

4244

4345
v0.41.0

src/idom/html.py

-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
1515
**Content sectioning**
1616
17-
- :func:`body`
1817
- :func:`address`
1918
- :func:`article`
2019
- :func:`aside`
@@ -189,7 +188,6 @@ def _(*children: Any, key: Key | None = None) -> VdomDict:
189188
title = make_vdom_constructor("title")
190189

191190
# Content sectioning
192-
body = make_vdom_constructor("body")
193191
address = make_vdom_constructor("address")
194192
article = make_vdom_constructor("article")
195193
aside = make_vdom_constructor("aside")

src/idom/utils.py

+28-31
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import Any, Callable, Generic, Iterable, TypeVar, cast
66

77
from lxml import etree
8-
from lxml.html import fragments_fromstring, tostring
8+
from lxml.html import fromstring, tostring
99

1010
import idom
1111
from idom.core.types import VdomDict
@@ -85,7 +85,7 @@ def html_to_vdom(
8585
using a ``key=...`` attribute within your HTML tag.
8686
8787
Parameters:
88-
source:
88+
html:
8989
The raw HTML as a string
9090
transforms:
9191
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
@@ -99,15 +99,15 @@ def html_to_vdom(
9999
raise TypeError(f"Expected html to be a string, not {type(html).__name__}")
100100

101101
# If the user provided a string, convert it to a list of lxml.etree nodes
102-
parser = etree.HTMLParser(
103-
remove_comments=True,
104-
remove_pis=True,
105-
remove_blank_text=True,
106-
recover=not strict,
107-
)
108102
try:
109-
nodes: list[etree._Element] = fragments_fromstring(
110-
html, no_leading_text=True, parser=parser
103+
root_node: etree._Element = fromstring(
104+
html.strip(),
105+
parser=etree.HTMLParser(
106+
remove_comments=True,
107+
remove_pis=True,
108+
remove_blank_text=True,
109+
recover=not strict,
110+
),
111111
)
112112
except etree.XMLSyntaxError as e:
113113
if not strict:
@@ -119,25 +119,8 @@ def html_to_vdom(
119119
"you can disable the strict parameter on html_to_vdom().\n"
120120
"Otherwise, repair your broken HTML and try again."
121121
) from e
122-
has_root_node = len(nodes) == 1
123-
124-
# Find or create a root node
125-
if has_root_node:
126-
root_node = nodes[0]
127-
else:
128-
# etree.Element requires a non-empty tag - we correct this below
129-
root_node = etree.Element("TEMP", None, None)
130-
for child in nodes:
131-
root_node.append(child)
132122

133-
# Convert the lxml node to a VDOM dict
134-
vdom = _etree_to_vdom(root_node, transforms)
135-
136-
# Change the artificially created root node to a React Fragment, instead of a div
137-
if not has_root_node:
138-
vdom["tagName"] = ""
139-
140-
return vdom
123+
return _etree_to_vdom(root_node, transforms)
141124

142125

143126
class HTMLParseError(etree.LxmlSyntaxError): # type: ignore[misc]
@@ -147,10 +130,10 @@ class HTMLParseError(etree.LxmlSyntaxError): # type: ignore[misc]
147130
def _etree_to_vdom(
148131
node: etree._Element, transforms: Iterable[_ModelTransform]
149132
) -> VdomDict:
150-
"""Recusively transform an lxml etree node into a DOM model
133+
"""Transform an lxml etree node into a DOM model
151134
152135
Parameters:
153-
source:
136+
node:
154137
The ``lxml.etree._Element`` node
155138
transforms:
156139
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
@@ -162,7 +145,7 @@ def _etree_to_vdom(
162145
f"Expected node to be a etree._Element, not {type(node).__name__}"
163146
)
164147

165-
# This will recursively call _etree_to_vdom() on all children
148+
# Recursively call _etree_to_vdom() on all children
166149
children = _generate_vdom_children(node, transforms)
167150

168151
# Convert the lxml node to a VDOM dict
@@ -289,6 +272,20 @@ def _hypen_to_camel_case(string: str) -> str:
289272
return first.lower() + remainder.title().replace("-", "")
290273

291274

275+
def del_html_head_body_transform(vdom: VdomDict) -> VdomDict:
276+
"""Transform intended for use with `html_to_vdom`.
277+
278+
Removes `<html>`, `<head>`, and `<body>` while preserving their children.
279+
280+
Parameters:
281+
vdom:
282+
The VDOM dictionary to transform.
283+
"""
284+
if vdom["tagName"] in {"html", "body", "head"}:
285+
return {"tagName": "", "children": vdom["children"]}
286+
return vdom
287+
288+
292289
def _vdom_attr_to_html_str(key: str, value: Any) -> tuple[str, str]:
293290
if key == "style":
294291
if isinstance(value, dict):

tests/test_utils.py

+38-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44

55
import idom
66
from idom import html
7-
from idom.utils import HTMLParseError, html_to_vdom, vdom_to_html
7+
from idom.utils import (
8+
HTMLParseError,
9+
del_html_head_body_transform,
10+
html_to_vdom,
11+
vdom_to_html,
12+
)
813

914

1015
def test_basic_ref_behavior():
@@ -144,7 +149,7 @@ def test_html_to_vdom_with_no_parent_node():
144149
source = "<p>Hello</p><div>World</div>"
145150

146151
expected = {
147-
"tagName": "",
152+
"tagName": "div",
148153
"children": [
149154
{"tagName": "p", "children": ["Hello"]},
150155
{"tagName": "div", "children": ["World"]},
@@ -154,6 +159,37 @@ def test_html_to_vdom_with_no_parent_node():
154159
assert html_to_vdom(source) == expected
155160

156161

162+
def test_del_html_body_transform():
163+
source = """
164+
<!DOCTYPE html>
165+
<html lang="en">
166+
167+
<head>
168+
<title>My Title</title>
169+
</head>
170+
171+
<body><h1>Hello World</h1></body>
172+
173+
</html>
174+
"""
175+
176+
expected = {
177+
"tagName": "",
178+
"children": [
179+
{
180+
"tagName": "",
181+
"children": [{"tagName": "title", "children": ["My Title"]}],
182+
},
183+
{
184+
"tagName": "",
185+
"children": [{"tagName": "h1", "children": ["Hello World"]}],
186+
},
187+
],
188+
}
189+
190+
assert html_to_vdom(source, del_html_head_body_transform) == expected
191+
192+
157193
SOME_OBJECT = object()
158194

159195

0 commit comments

Comments
 (0)