Skip to content

Commit 21ef660

Browse files
committed
internal/testing/htmlcheck: replace use of cascadia in In
This cl implements the :nth-child() and :nth-of-type() pseudoclasses for the css selector query function, allowing it to be used for htmlcheck.In. It also replaces the single use of a a child combinator '>' in a test with a descendant combinator ' ' so that we don't need to implement the child combinator. The test should have the same behavior. For golang/go#61399 Change-Id: I09d9b8fbcd0eafd37aceb5994515687c85244ef8 Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/542058 kokoro-CI: kokoro <[email protected]> Reviewed-by: Jonathan Amsterdam <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent a7a0e8d commit 21ef660

File tree

4 files changed

+182
-24
lines changed

4 files changed

+182
-24
lines changed

internal/testing/htmlcheck/htmlcheck.go

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"regexp"
1313
"strings"
1414

15-
"github.com/andybalholm/cascadia"
1615
"golang.org/x/net/html"
1716
)
1817

@@ -38,15 +37,9 @@ func Run(reader io.Reader, checker Checker) error {
3837
//
3938
// A nil Checker is valid and always succeeds.
4039
func In(selector string, checkers ...Checker) Checker {
41-
sel := mustParseCascadiaSelector(selector)
40+
sel := mustParseSelector(selector)
4241
return func(n *html.Node) error {
43-
var m *html.Node
44-
// cascadia.Query does not test against its argument node.
45-
if sel.Match(n) {
46-
m = n
47-
} else {
48-
m = cascadia.Query(n, sel)
49-
}
42+
m := query(n, sel)
5043
if m == nil {
5144
return fmt.Errorf("no element matches selector %q", selector)
5245
}
@@ -84,19 +77,6 @@ func check(n *html.Node, Checkers []Checker) error {
8477
return nil
8578
}
8679

87-
// mustParseCascadiaSelector parses the given CSS selector. An empty string
88-
// is treated as "*" (match everything).
89-
func mustParseCascadiaSelector(s string) cascadia.Sel {
90-
if s == "" {
91-
s = "*"
92-
}
93-
sel, err := cascadia.Parse(s)
94-
if err != nil {
95-
panic(fmt.Sprintf("parsing %q: %v", s, err))
96-
}
97-
return sel
98-
}
99-
10080
// mustParseSelector parses the given CSS selector. An empty string
10181
// is treated as matching everything.
10282
func mustParseSelector(s string) *selector {

internal/testing/htmlcheck/query.go

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package htmlcheck
77
import (
88
"errors"
99
"fmt"
10+
"strconv"
1011
"strings"
1112

1213
"golang.org/x/net/html"
@@ -122,13 +123,62 @@ func parse(s string) (*selector, error) {
122123
}
123124
sel.next = next
124125
return sel, nil
126+
case s[0] == ':':
127+
atom, rest, err := parsePseudoClass(s)
128+
if err != nil {
129+
return nil, err
130+
}
131+
sel.atoms = append(sel.atoms, atom)
132+
s = rest
125133
default:
126-
return nil, fmt.Errorf("unexpected character %q in input", s[0])
134+
return nil, fmt.Errorf("unexpected character '%v' in input", string(s[0]))
127135
}
128136
}
129137
return sel, nil
130138
}
131139

140+
// parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and
141+
// number arguments to those functions, not even, odd, or an+b.
142+
func parsePseudoClass(s string) (selectorAtom, string, error) {
143+
if s[0] != ':' {
144+
return nil, "", errors.New("expected ':' at beginning of pseudo class")
145+
}
146+
ident, rest := consumeIdentifier(s[1:])
147+
if len(ident) == 0 {
148+
return nil, "", errors.New("expected identifier after : in pseudo class")
149+
}
150+
if ident != "nth-of-type" && ident != "nth-child" {
151+
return nil, "", errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported")
152+
}
153+
s = rest
154+
if len(s) == 0 || s[0] != '(' {
155+
return nil, "", errors.New("expected '(' after :nth-of-type or nth-child")
156+
}
157+
numstr, rest := consumeNumber(s[1:])
158+
if len(numstr) == 0 {
159+
return nil, "", errors.New("only number arguments are supported for :nth-of-type() or :nth-child()")
160+
}
161+
num, err := strconv.Atoi(numstr)
162+
if err != nil {
163+
// This shouldn't ever happen because consumeNumber should only return valid numbers and we
164+
// check that the length is greater than 0.
165+
panic(fmt.Errorf("unexpected parse error for number: %v", err))
166+
}
167+
s = rest
168+
if len(s) == 0 || s[0] != ')' {
169+
return nil, "", errors.New("expected ')' after number argument to nth-of-type or nth-child")
170+
}
171+
rest = s[1:]
172+
switch ident {
173+
case "nth-of-type":
174+
return &nthOfType{num}, rest, nil
175+
case "nth-child":
176+
return &nthChild{num}, rest, nil
177+
default:
178+
panic("we should only have allowed nth-of-type or nth-child up to this point")
179+
}
180+
}
181+
132182
// parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"]
133183
func parseAttributeSelector(s string) (*attributeSelector, string, error) {
134184
if s[0] != '[' {
@@ -192,6 +242,15 @@ func consumeIdentifier(s string) (letters, rest string) {
192242
return s[:i], s[i:]
193243
}
194244

245+
// consumeNumber consumes and returns a (0-9)+ number at the beginning
246+
// of the given string, and the rest of the string.
247+
func consumeNumber(s string) (letters, rest string) {
248+
i := 0
249+
for ; i < len(s) && isNumber(s[i]); i++ {
250+
}
251+
return s[:i], s[i:]
252+
}
253+
195254
func isAscii(s string) bool {
196255
for i := 0; i < len(s); i++ {
197256
if s[i] > 127 {
@@ -268,3 +327,65 @@ func (s *classSelector) match(n *html.Node) bool {
268327
}
269328
return false
270329
}
330+
331+
// nthOfType implements the :nth-of-type() pseudoclass.
332+
type nthOfType struct {
333+
n int
334+
}
335+
336+
func (s *nthOfType) match(n *html.Node) bool {
337+
if n.Type != html.ElementNode || n.Parent == nil {
338+
return false
339+
}
340+
curChild := n.Parent.FirstChild
341+
i := 0
342+
for {
343+
if curChild.Type == html.ElementNode && curChild.Data == n.Data {
344+
i++
345+
if i == s.n {
346+
break
347+
}
348+
}
349+
if curChild.NextSibling == nil {
350+
break
351+
}
352+
curChild = curChild.NextSibling
353+
}
354+
if i != s.n {
355+
// there were fewer than n children of this element type.
356+
return false
357+
}
358+
return curChild == n
359+
}
360+
361+
// nthChild implements the :nth-child() pseudoclass
362+
type nthChild struct {
363+
n int
364+
}
365+
366+
func (s *nthChild) match(n *html.Node) bool {
367+
if n.Type != html.ElementNode || n.Parent == nil {
368+
return false
369+
}
370+
curChild := n.Parent.FirstChild
371+
// Advance to next element node.
372+
for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
373+
curChild = curChild.NextSibling
374+
}
375+
i := 1
376+
for ; i < s.n; i++ {
377+
if curChild.NextSibling == nil {
378+
break
379+
}
380+
curChild = curChild.NextSibling
381+
// Advance to next element node.
382+
for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
383+
curChild = curChild.NextSibling
384+
}
385+
}
386+
if i != s.n {
387+
// there were fewer than n children.
388+
return false
389+
}
390+
return curChild == n
391+
}

internal/testing/htmlcheck/query_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,52 @@ func TestParse(t *testing.T) {
120120
nil,
121121
errors.New("expected ']' at end of attribute selector"),
122122
},
123+
{
124+
`.VulnMain-title:nth-of-type(4)`,
125+
&selector{atoms: []selectorAtom{&classSelector{"VulnMain-title"}, &nthOfType{4}}},
126+
nil,
127+
},
128+
{
129+
`th:nth-child(2)`,
130+
&selector{atoms: []selectorAtom{&elementSelector{"th"}, &nthChild{2}}},
131+
nil,
132+
},
133+
{
134+
`th:(2)`,
135+
nil,
136+
errors.New("expected identifier after : in pseudo class"),
137+
},
138+
{
139+
`th:32(2)`,
140+
nil,
141+
errors.New("expected identifier after : in pseudo class"),
142+
},
143+
{
144+
`th:active`,
145+
nil,
146+
errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported"),
147+
},
148+
{
149+
`th:nth-child`,
150+
nil,
151+
errors.New("expected '(' after :nth-of-type or nth-child"),
152+
},
153+
{
154+
`th:nth-child(odd)`,
155+
nil,
156+
errors.New("only number arguments are supported for :nth-of-type() or :nth-child()"),
157+
},
158+
{
159+
`th:nth-child(14`,
160+
nil,
161+
errors.New("expected ')' after number argument to nth-of-type or nth-child"),
162+
},
163+
// We don't support the child combinator. Make sure it returns a parse error.
164+
{
165+
".Documentation-sinceVersion > .Documentation-sinceVersionVersion",
166+
nil,
167+
errors.New("unexpected character '>' in input"),
168+
},
123169
}
124170
for _, tc := range testCases {
125171
sel, err := parse(tc.text)
@@ -156,6 +202,17 @@ func TestQuery(t *testing.T) {
156202
{`<div></div><div><div>wrong</div></div><div id="wrong-id"><div class="my-class">also wrong</div></div><div id="my-id"><div class="wrong-class">still wrong</div></div><div id="my-id"><div class="my-class">match</div></div>`, "div#my-id div.my-class", `<div class="my-class">match</div>`},
157203
{`<a></a><div class="UnitMeta-repo"><a href="foo" title="">link body</a></div>`, ".UnitMeta-repo a", `<a href="foo" title="">link body</a>`},
158204
{`<ul class="UnitFiles-fileList"><li><a href="foo">a.go</a></li></ul>`, ".UnitFiles-fileList a", `<a href="foo">a.go</a>`},
205+
{`<ul><li>first child</li><li>second child</li></ul>`, "li:nth-child(2)", `<li>second child</li>`},
206+
{`<ul> <li>first child</li> <li>second child</li> </ul>`, "li:nth-child(2)", `<li>second child</li>`},
207+
{`<div><div>not counted</div><p class="class">first paragraph</p></div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
208+
{`<div><div>not counted</div> <p class="class">first paragraph</p> </div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
209+
{`<div><div class="class">not counted</div><p class="class">first paragraph</p>`, ".class:nth-of-type(2)", ``},
210+
{`<div><div class="class">not counted</div><p class="class">first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
211+
{`<div><div>not counted</div><p>first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
212+
{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
213+
{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p><td>also not counted</td><p>third paragraph</p><p>fourth paragraph</p><p class="class">fifth paragraph</p></div>`, ".class:nth-of-type(5)", `<p class="class">fifth paragraph</p>`},
214+
{`<table class="UnitDirectories-table"><tbody><tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis">Synopsis</th></tr>`, "th:nth-child(1)", "<th>Path</th>"},
215+
{`<table class="UnitDirectories-table"> <tbody> <tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis"> Synopsis </th> </tr>`, "th:nth-child(1)", "<th>Path</th>"},
159216
}
160217
for _, tc := range testCases {
161218
n, err := html.Parse(strings.NewReader(tc.queriedText))

internal/testing/integration/frontend_main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestFrontendMainPage(t *testing.T) {
5151
in("#S2",
5252
in(".Documentation-sinceVersion", hasText(""))),
5353
in("#String",
54-
in(".Documentation-sinceVersion > .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
54+
in(".Documentation-sinceVersion .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
5555
in("#T",
5656
in(".Documentation-sinceVersion", hasText(""))),
5757
in("#TF",

0 commit comments

Comments
 (0)