@@ -7,6 +7,7 @@ package htmlcheck
7
7
import (
8
8
"errors"
9
9
"fmt"
10
+ "strconv"
10
11
"strings"
11
12
12
13
"golang.org/x/net/html"
@@ -122,13 +123,62 @@ func parse(s string) (*selector, error) {
122
123
}
123
124
sel .next = next
124
125
return sel , nil
126
+ case s [0 ] == ':' :
127
+ atom , rest , err := parsePseudoClass (s )
128
+ if err != nil {
129
+ return nil , err
130
+ }
131
+ sel .atoms = append (sel .atoms , atom )
132
+ s = rest
125
133
default :
126
- return nil , fmt .Errorf ("unexpected character %q in input" , s [0 ])
134
+ return nil , fmt .Errorf ("unexpected character '%v' in input" , string ( s [0 ]) )
127
135
}
128
136
}
129
137
return sel , nil
130
138
}
131
139
140
+ // parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and
141
+ // number arguments to those functions, not even, odd, or an+b.
142
+ func parsePseudoClass (s string ) (selectorAtom , string , error ) {
143
+ if s [0 ] != ':' {
144
+ return nil , "" , errors .New ("expected ':' at beginning of pseudo class" )
145
+ }
146
+ ident , rest := consumeIdentifier (s [1 :])
147
+ if len (ident ) == 0 {
148
+ return nil , "" , errors .New ("expected identifier after : in pseudo class" )
149
+ }
150
+ if ident != "nth-of-type" && ident != "nth-child" {
151
+ return nil , "" , errors .New ("only :nth-of-type() and :nth-child() pseudoclasses are supported" )
152
+ }
153
+ s = rest
154
+ if len (s ) == 0 || s [0 ] != '(' {
155
+ return nil , "" , errors .New ("expected '(' after :nth-of-type or nth-child" )
156
+ }
157
+ numstr , rest := consumeNumber (s [1 :])
158
+ if len (numstr ) == 0 {
159
+ return nil , "" , errors .New ("only number arguments are supported for :nth-of-type() or :nth-child()" )
160
+ }
161
+ num , err := strconv .Atoi (numstr )
162
+ if err != nil {
163
+ // This shouldn't ever happen because consumeNumber should only return valid numbers and we
164
+ // check that the length is greater than 0.
165
+ panic (fmt .Errorf ("unexpected parse error for number: %v" , err ))
166
+ }
167
+ s = rest
168
+ if len (s ) == 0 || s [0 ] != ')' {
169
+ return nil , "" , errors .New ("expected ')' after number argument to nth-of-type or nth-child" )
170
+ }
171
+ rest = s [1 :]
172
+ switch ident {
173
+ case "nth-of-type" :
174
+ return & nthOfType {num }, rest , nil
175
+ case "nth-child" :
176
+ return & nthChild {num }, rest , nil
177
+ default :
178
+ panic ("we should only have allowed nth-of-type or nth-child up to this point" )
179
+ }
180
+ }
181
+
132
182
// parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"]
133
183
func parseAttributeSelector (s string ) (* attributeSelector , string , error ) {
134
184
if s [0 ] != '[' {
@@ -192,6 +242,15 @@ func consumeIdentifier(s string) (letters, rest string) {
192
242
return s [:i ], s [i :]
193
243
}
194
244
245
+ // consumeNumber consumes and returns a (0-9)+ number at the beginning
246
+ // of the given string, and the rest of the string.
247
+ func consumeNumber (s string ) (letters , rest string ) {
248
+ i := 0
249
+ for ; i < len (s ) && isNumber (s [i ]); i ++ {
250
+ }
251
+ return s [:i ], s [i :]
252
+ }
253
+
195
254
func isAscii (s string ) bool {
196
255
for i := 0 ; i < len (s ); i ++ {
197
256
if s [i ] > 127 {
@@ -268,3 +327,65 @@ func (s *classSelector) match(n *html.Node) bool {
268
327
}
269
328
return false
270
329
}
330
+
331
+ // nthOfType implements the :nth-of-type() pseudoclass.
332
+ type nthOfType struct {
333
+ n int
334
+ }
335
+
336
+ func (s * nthOfType ) match (n * html.Node ) bool {
337
+ if n .Type != html .ElementNode || n .Parent == nil {
338
+ return false
339
+ }
340
+ curChild := n .Parent .FirstChild
341
+ i := 0
342
+ for {
343
+ if curChild .Type == html .ElementNode && curChild .Data == n .Data {
344
+ i ++
345
+ if i == s .n {
346
+ break
347
+ }
348
+ }
349
+ if curChild .NextSibling == nil {
350
+ break
351
+ }
352
+ curChild = curChild .NextSibling
353
+ }
354
+ if i != s .n {
355
+ // there were fewer than n children of this element type.
356
+ return false
357
+ }
358
+ return curChild == n
359
+ }
360
+
361
+ // nthChild implements the :nth-child() pseudoclass
362
+ type nthChild struct {
363
+ n int
364
+ }
365
+
366
+ func (s * nthChild ) match (n * html.Node ) bool {
367
+ if n .Type != html .ElementNode || n .Parent == nil {
368
+ return false
369
+ }
370
+ curChild := n .Parent .FirstChild
371
+ // Advance to next element node.
372
+ for curChild .Type != html .ElementNode && curChild .NextSibling != nil {
373
+ curChild = curChild .NextSibling
374
+ }
375
+ i := 1
376
+ for ; i < s .n ; i ++ {
377
+ if curChild .NextSibling == nil {
378
+ break
379
+ }
380
+ curChild = curChild .NextSibling
381
+ // Advance to next element node.
382
+ for curChild .Type != html .ElementNode && curChild .NextSibling != nil {
383
+ curChild = curChild .NextSibling
384
+ }
385
+ }
386
+ if i != s .n {
387
+ // there were fewer than n children.
388
+ return false
389
+ }
390
+ return curChild == n
391
+ }
0 commit comments