diff --git a/cssselect/parser.py b/cssselect/parser.py index a27ece5..4fbeb2f 100644 --- a/cssselect/parser.py +++ b/cssselect/parser.py @@ -238,12 +238,22 @@ class Negation(object): Represents selector:not(subselector) """ - def __init__(self, selector, subselector): + def __init__(self, selector, subselector, combinator=None, subselector2=None): self.selector = selector self.subselector = subselector + self.combinator = combinator + self.subselector2 = subselector2 def __repr__(self): - return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + if self.combinator is None and self.subselector2 is None: + return "%s[%r:not(%r)]" % (self.__class__.__name__, self.selector, self.subselector) + return "%s[%r:not(%r %s %r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + self.combinator.value, + self.subselector2.parsed_tree, + ) def canonical(self): subsel = self.subselector.canonical() @@ -257,6 +267,41 @@ def specificity(self): return a1 + a2, b1 + b2, c1 + c2 +class Relation(object): + """ + Represents selector:has(subselector) + """ + + def __init__(self, selector, combinator, subselector): + self.selector = selector + self.combinator = combinator + self.subselector = subselector + + def __repr__(self): + return "%s[%r:has(%r)]" % ( + self.__class__.__name__, + self.selector, + self.subselector, + ) + + def canonical(self): + try: + subsel = self.subselector[0].canonical() + except TypeError: + subsel = self.subselector.canonical() + if len(subsel) > 1: + subsel = subsel.lstrip("*") + return "%s:has(%s)" % (self.selector.canonical(), subsel) + + def specificity(self): + a1, b1, c1 = self.selector.specificity() + try: + a2, b2, c2 = self.subselector[-1].specificity() + except TypeError: + a2, b2, c2 = self.subselector.specificity() + return a1 + a2, b1 + b2, c1 + c2 + + class Matching(object): """ Represents selector:is(selector_list) @@ -579,9 +624,15 @@ def parse_simple_selector(stream, inside_negation=False): "Got pseudo-element ::%s inside :not() at %s" % (argument_pseudo_element, next.pos) ) + combinator = arguments = None if next != ("DELIM", ")"): - raise SelectorSyntaxError("Expected ')', got %s" % (next,)) - result = Negation(result, argument) + stream.skip_whitespace() + combinator, arguments = parse_relative_selector(stream) + result = Negation(result, argument, combinator, arguments) + elif ident.lower() == "has": + combinator, arguments = parse_relative_selector(stream) + result = Relation(result, combinator, arguments) + elif ident.lower() in ("matches", "is"): selectors = parse_simple_selector_arguments(stream) result = Matching(result, selectors) @@ -607,6 +658,29 @@ def parse_arguments(stream): raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) +def parse_relative_selector(stream): + stream.skip_whitespace() + subselector = "" + next = stream.next() + + if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]: + combinator = next + stream.skip_whitespace() + next = stream.next() + else: + combinator = Token("DELIM", " ", pos=0) + + while 1: + if next.type in ("IDENT", "STRING", "NUMBER") or next in [("DELIM", "."), ("DELIM", "*")]: + subselector += next.value + elif next == ("DELIM", ")"): + result = parse(subselector) + return combinator, result[0] + else: + raise SelectorSyntaxError("Expected an argument, got %s" % (next,)) + next = stream.next() + + def parse_simple_selector_arguments(stream): arguments = [] while 1: diff --git a/cssselect/xpath.py b/cssselect/xpath.py index f80e629..47cb755 100644 --- a/cssselect/xpath.py +++ b/cssselect/xpath.py @@ -14,6 +14,7 @@ import sys import re +import copy from cssselect.parser import parse, parse_series, SelectorError @@ -75,13 +76,13 @@ def add_star_prefix(self): """ self.path += "*/" - def join(self, combiner, other): + def join(self, combiner, other, closing_combiner=None): path = _unicode(self) + combiner # Any "star prefix" is redundant when joining. if other.path != "*/": path += other.path self.path = path - self.element = other.element + self.element = other.element + closing_combiner if closing_combiner else other.element self.condition = other.condition return self @@ -269,12 +270,32 @@ def xpath_combinedselector(self, combined): def xpath_negation(self, negation): xpath = self.xpath(negation.selector) sub_xpath = self.xpath(negation.subselector) - sub_xpath.add_name_test() - if sub_xpath.condition: + if negation.combinator is not None and negation.subselector2 is not None: + sub2_xpath = self.xpath(negation.subselector2.parsed_tree) + method = getattr( + self, + "xpath_negation_%s_combinator" + % self.combinator_mapping[negation.combinator.value], + ) + return method(xpath, sub_xpath, sub2_xpath) + elif sub_xpath.condition: + sub_xpath.add_name_test() return xpath.add_condition("not(%s)" % sub_xpath.condition) else: + sub_xpath.add_name_test() return xpath.add_condition("0") + def xpath_relation(self, relation): + xpath = self.xpath(relation.selector) + combinator = relation.combinator + subselector = relation.subselector + right = self.xpath(subselector.parsed_tree) + method = getattr( + self, + "xpath_relation_%s_combinator" % self.combinator_mapping[combinator.value], + ) + return method(xpath, right) + def xpath_matching(self, matching): xpath = self.xpath(matching.selector) exprs = [self.xpath(selector) for selector in matching.selector_list] @@ -376,6 +397,46 @@ def xpath_indirect_adjacent_combinator(self, left, right): """right is a sibling after left, immediately or not""" return left.join("/following-sibling::", right) + def xpath_relation_descendant_combinator(self, left, right): + """right is a child, grand-child or further descendant of left; select left""" + return left.join("[descendant::", right, closing_combiner="]") + + def xpath_relation_child_combinator(self, left, right): + """right is an immediate child of left; select left""" + return left.join("[./", right, closing_combiner="]") + + def xpath_relation_direct_adjacent_combinator(self, left, right): + """right is a sibling immediately after left; select left""" + xpath = left.add_condition( + "following-sibling::*[(name() = '{}') and (position() = 1)]".format(right.element) + ) + return xpath + + def xpath_relation_indirect_adjacent_combinator(self, left, right): + """right is a sibling after left, immediately or not; select left""" + return left.join("[following-sibling::", right, closing_combiner="]") + + def xpath_negation_descendant_combinator(self, xpath, left, right): + xpath.add_condition('not(name()="%s" and ancestor::*[name()="%s"])' % (right, left)) + return xpath + + def xpath_negation_child_combinator(self, xpath, left, right): + xpath.add_condition('not(name()="%s" and parent::*[name()="%s"])' % (right, left)) + return xpath + + def xpath_negation_direct_adjacent_combinator(self, xpath, left, right): + xpath.add_condition( + 'not(name()="%s" and following-sibling::*[position()=1 and name()="%s"])' + % (right, left) + ) + return xpath + + def xpath_negation_indirect_adjacent_combinator(self, xpath, left, right): + xpath.add_condition( + 'not(name()="%s" and following-sibling::*[name()="%s"])' % (right, left) + ) + return xpath + # Function: dispatch by function/pseudo-class name def xpath_nth_child_function(self, xpath, function, last=False, add_name_test=True): diff --git a/tests/test_cssselect.py b/tests/test_cssselect.py index ba46d8a..5d28c3a 100644 --- a/tests/test_cssselect.py +++ b/tests/test_cssselect.py @@ -145,9 +145,16 @@ def parse_many(first, *others): assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"] assert parse_many('div:contains("foo")') == ["Function[Element[div]:contains(['foo'])]"] assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"] + assert parse_many(":not(a > b)") == ["Negation[Element[*]:not(Element[a] > Element[b])]"] + assert parse_many(":not(a + b)") == ["Negation[Element[*]:not(Element[a] + Element[b])]"] + assert parse_many(":not(a ~ b)") == ["Negation[Element[*]:not(Element[a] ~ Element[b])]"] + assert parse_many(":not(a b)") == ["Negation[Element[*]:not(Element[a] Element[b])]"] assert parse_many("div:not(div.foo)") == [ "Negation[Element[div]:not(Class[Element[div].foo])]" ] + assert parse_many("div:has(div.foo)") == [ + "Relation[Element[div]:has(Selector[Class[Element[div].foo]])]" + ] assert parse_many("div:is(.foo, #bar)") == [ "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]" ] @@ -279,6 +286,11 @@ def specificity(css): assert specificity(":not(:empty)") == (0, 1, 0) assert specificity(":not(#foo)") == (1, 0, 0) + assert specificity(":has(*)") == (0, 0, 0) + assert specificity(":has(foo)") == (0, 0, 1) + assert specificity(":has(.foo)") == (0, 1, 0) + assert specificity(":has(> foo)") == (0, 0, 1) + assert specificity(":is(.foo, #bar)") == (1, 0, 0) assert specificity(":is(:hover, :visited)") == (0, 1, 0) @@ -315,6 +327,9 @@ def css2css(css, res=None): css2css(":not(*[foo])", ":not([foo])") css2css(":not(:empty)") css2css(":not(#foo)") + css2css(":has(*)") + css2css(":has(foo)") + css2css(":has(*.foo)", ":has(.foo)") css2css(":is(#bar, .foo)") css2css(":is(:focused, :visited)") css2css("foo:empty") @@ -379,6 +394,10 @@ def get_error(css): ) assert get_error("> div p") == ("Expected selector, got ' at 0>") + # Unsupported :has() with several arguments + assert get_error(":has(a, b)") == ("Expected an argument, got ") + assert get_error(":has()") == ("Expected selector, got ") + def test_translation(self): def xpath(css): return _unicode(GenericTranslator().css_to_xpath(css, prefix="")) @@ -453,6 +472,24 @@ def xpath(css): assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]") assert xpath("e:root") == ("e[not(parent::*)]") assert xpath("e:hover") == ("e[0]") # never matches + assert xpath("*:not(a > b)") == ( + '*[not(name()="b" and parent::*[name()="a"])]' + ) # select anything that is not b or doesn't have a parent a + assert xpath("*:not(a + b)") == ( + '*[not(name()="b" and following-sibling::*[position()=1 and name()="a"])]' + ) # select anything that is not b or doesn't have an immediate sibling a + assert xpath("*:not(a ~ b)") == ( + '*[not(name()="b" and following-sibling::*[name()="a"])]' + ) # select anything that is not b or doesn't have a sibling a + assert xpath("*:not(a b)") == ( + '*[not(name()="b" and ancestor::*[name()="a"])]' + ) # select anything that is not b or doesn't have an ancestor a + assert xpath("e:has(> f)") == "e[./f]" + assert xpath("e:has(f)") == "e[descendant::f]" + assert xpath("e:has(~ f)") == "e[following-sibling::f]" + assert ( + xpath("e:has(+ f)") == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]" + ) assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]") assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]") assert xpath("e.warning") == ( @@ -863,6 +900,8 @@ def pcss(main, *selectors, **kwargs): "sixth-li", "seventh-li", ] + assert pcss("link:has(*)") == [] + assert pcss("ol:has(div)") == ["first-ol"] assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"] assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"] assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"]