Skip to content

Commit 9ce7a69

Browse files
namusyakanigeltao
authored andcommitted
html: implement ParseWithOptions and ParseFragmentWithOptions
This commit newly introduces a type for configuring a parser called ParseOption, and implements two functions depending on it. Along with that, this introduces ParseOptionEnableScripting to enable setting of the scripting flag. Fixes golang/go#16318 Change-Id: Ie7fd7d8ce286e22e7f57182fc2ce353bce578db6 Reviewed-on: https://go-review.googlesource.com/c/net/+/174157 Reviewed-by: Nigel Tao <[email protected]>
1 parent 4829fb1 commit 9ce7a69

File tree

2 files changed

+43
-26
lines changed

2 files changed

+43
-26
lines changed

html/parse.go

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,6 +2300,33 @@ func (p *parser) parse() error {
23002300
//
23012301
// The input is assumed to be UTF-8 encoded.
23022302
func Parse(r io.Reader) (*Node, error) {
2303+
return ParseWithOptions(r)
2304+
}
2305+
2306+
// ParseFragment parses a fragment of HTML and returns the nodes that were
2307+
// found. If the fragment is the InnerHTML for an existing element, pass that
2308+
// element in context.
2309+
//
2310+
// It has the same intricacies as Parse.
2311+
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2312+
return ParseFragmentWithOptions(r, context)
2313+
}
2314+
2315+
// ParseOption configures a parser.
2316+
type ParseOption func(p *parser)
2317+
2318+
// ParseOptionEnableScripting configures the scripting flag.
2319+
// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2320+
//
2321+
// By default, scripting is enabled.
2322+
func ParseOptionEnableScripting(enable bool) ParseOption {
2323+
return func(p *parser) {
2324+
p.scripting = enable
2325+
}
2326+
}
2327+
2328+
// ParseWithOptions is like Parse, with options.
2329+
func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
23032330
p := &parser{
23042331
tokenizer: NewTokenizer(r),
23052332
doc: &Node{
@@ -2309,19 +2336,20 @@ func Parse(r io.Reader) (*Node, error) {
23092336
framesetOK: true,
23102337
im: initialIM,
23112338
}
2339+
2340+
for _, f := range opts {
2341+
f(p)
2342+
}
2343+
23122344
err := p.parse()
23132345
if err != nil {
23142346
return nil, err
23152347
}
23162348
return p.doc, nil
23172349
}
23182350

2319-
// ParseFragment parses a fragment of HTML and returns the nodes that were
2320-
// found. If the fragment is the InnerHTML for an existing element, pass that
2321-
// element in context.
2322-
//
2323-
// It has the same intricacies as Parse.
2324-
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2351+
// ParseFragmentWithOptions is like ParseFragment, with options.
2352+
func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
23252353
contextTag := ""
23262354
if context != nil {
23272355
if context.Type != ElementNode {
@@ -2345,6 +2373,10 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
23452373
context: context,
23462374
}
23472375

2376+
for _, f := range opts {
2377+
f(p)
2378+
}
2379+
23482380
root := &Node{
23492381
Type: ElementNode,
23502382
DataAtom: a.Html,

html/parse_test.go

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ func TestParser(t *testing.T) {
228228
t.Fatal(err)
229229
}
230230

231-
err = testParseCase(text, want, context, Parse)
231+
err = testParseCase(text, want, context)
232232

233233
if err != nil {
234234
t.Errorf("%s test #%d %q, %s", tf, i, text, err)
@@ -250,22 +250,7 @@ func TestParserWithoutScripting(t *testing.T) {
250250
| <img>
251251
| src="https://golang.org/doc/gopher/doc.png"
252252
`
253-
err := testParseCase(text, want, "", func(r io.Reader) (*Node, error) {
254-
p := &parser{
255-
tokenizer: NewTokenizer(r),
256-
doc: &Node{
257-
Type: DocumentNode,
258-
},
259-
scripting: false,
260-
framesetOK: true,
261-
im: initialIM,
262-
}
263-
err := p.parse()
264-
if err != nil {
265-
return nil, err
266-
}
267-
return p.doc, nil
268-
})
253+
err := testParseCase(text, want, "", ParseOptionEnableScripting(false))
269254

270255
if err != nil {
271256
t.Errorf("test with scripting is disabled, %q, %s", text, err)
@@ -276,7 +261,7 @@ func TestParserWithoutScripting(t *testing.T) {
276261
// pass, it returns an error that explains the failure.
277262
// text is the HTML to be parsed, want is a dump of the correct parse tree,
278263
// and context is the name of the context node, if any.
279-
func testParseCase(text, want, context string, parseFunc func(r io.Reader) (*Node, error)) (err error) {
264+
func testParseCase(text, want, context string, opts ...ParseOption) (err error) {
280265
defer func() {
281266
if x := recover(); x != nil {
282267
switch e := x.(type) {
@@ -290,7 +275,7 @@ func testParseCase(text, want, context string, parseFunc func(r io.Reader) (*Nod
290275

291276
var doc *Node
292277
if context == "" {
293-
doc, err = parseFunc(strings.NewReader(text))
278+
doc, err = ParseWithOptions(strings.NewReader(text), opts...)
294279
if err != nil {
295280
return err
296281
}
@@ -300,7 +285,7 @@ func testParseCase(text, want, context string, parseFunc func(r io.Reader) (*Nod
300285
DataAtom: atom.Lookup([]byte(context)),
301286
Data: context,
302287
}
303-
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
288+
nodes, err := ParseFragmentWithOptions(strings.NewReader(text), contextNode, opts...)
304289
if err != nil {
305290
return err
306291
}

0 commit comments

Comments
 (0)