123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448 |
- # -*- coding:utf-8 -*-
- #
- # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
- #
- # Distributed under the BSD license, see LICENSE.txt
- from __future__ import unicode_literals
- from cssselect import xpath as cssselect_xpath
- from cssselect.xpath import ExpressionError
- XPathExprOrig = cssselect_xpath.XPathExpr
- class XPathExpr(XPathExprOrig):
- def __init__(self, path='', element='*', condition='', star_prefix=False):
- self.path = path
- self.element = element
- self.condition = condition
- self.post_condition = None
- def add_post_condition(self, post_condition):
- if self.post_condition:
- self.post_condition = '%s and (%s)' % (self.post_condition,
- post_condition)
- else:
- self.post_condition = post_condition
- def __str__(self):
- path = XPathExprOrig.__str__(self)
- if self.post_condition:
- path = '%s[%s]' % (path, self.post_condition)
- return path
- def join(self, combiner, other):
- res = XPathExprOrig.join(self, combiner, other)
- self.post_condition = other.post_condition
- return res
- # keep cssselect < 0.8 compat for now
- class JQueryTranslator(cssselect_xpath.HTMLTranslator):
- """This class is used to implement the css pseudo classes
- (:first, :last, ...) that are not defined in the css standard,
- but are defined in the jquery API.
- """
- xpathexpr_cls = XPathExpr
- def xpath_first_pseudo(self, xpath):
- """Matches the first selected element::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><p class="first"></p><p></p></div>')
- >>> d('p:first')
- [<p.first>]
- ..
- """
- xpath.add_post_condition('position() = 1')
- return xpath
- def xpath_last_pseudo(self, xpath):
- """Matches the last selected element::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
- >>> d('p:last')
- [<p.last>]
- ..
- """
- xpath.add_post_condition('position() = last()')
- return xpath
- def xpath_even_pseudo(self, xpath):
- """Matches even elements, zero-indexed::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
- >>> d('p:even')
- [<p>]
- ..
- """
- # the first element is 1 in xpath and 0 in python and js
- xpath.add_post_condition('position() mod 2 = 1')
- return xpath
- def xpath_odd_pseudo(self, xpath):
- """Matches odd elements, zero-indexed::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
- >>> d('p:odd')
- [<p.last>]
- ..
- """
- xpath.add_post_condition('position() mod 2 = 0')
- return xpath
- def xpath_checked_pseudo(self, xpath):
- """Matches odd elements, zero-indexed::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input checked="checked"/></div>')
- >>> d('input:checked')
- [<input>]
- ..
- """
- xpath.add_condition("@checked and name(.) = 'input'")
- return xpath
- def xpath_selected_pseudo(self, xpath):
- """Matches all elements that are selected::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<select><option selected="selected"/></select>')
- >>> d('option:selected')
- [<option>]
- ..
- """
- xpath.add_condition("@selected and name(.) = 'option'")
- return xpath
- def xpath_disabled_pseudo(self, xpath):
- """Matches all elements that are disabled::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input disabled="disabled"/></div>')
- >>> d('input:disabled')
- [<input>]
- ..
- """
- xpath.add_condition("@disabled")
- return xpath
- def xpath_enabled_pseudo(self, xpath):
- """Matches all elements that are enabled::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input value="foo" /></div>')
- >>> d('input:enabled')
- [<input>]
- ..
- """
- xpath.add_condition("not(@disabled) and name(.) = 'input'")
- return xpath
- def xpath_file_pseudo(self, xpath):
- """Matches all input elements of type file::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="file"/></div>')
- >>> d('input:file')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'file' and name(.) = 'input'")
- return xpath
- def xpath_input_pseudo(self, xpath):
- """Matches all input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery(('<div><input type="file"/>'
- ... '<textarea></textarea></div>'))
- >>> d(':input')
- [<input>, <textarea>]
- ..
- """
- xpath.add_condition((
- "(name(.) = 'input' or name(.) = 'select') "
- "or (name(.) = 'textarea' or name(.) = 'button')"))
- return xpath
- def xpath_button_pseudo(self, xpath):
- """Matches all button input elements and the button element::
- >>> from pyquery import PyQuery
- >>> d = PyQuery(('<div><input type="button"/>'
- ... '<button></button></div>'))
- >>> d(':button')
- [<input>, <button>]
- ..
- """
- xpath.add_condition((
- "(@type = 'button' and name(.) = 'input') "
- "or name(.) = 'button'"))
- return xpath
- def xpath_radio_pseudo(self, xpath):
- """Matches all radio input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="radio"/></div>')
- >>> d('input:radio')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'radio' and name(.) = 'input'")
- return xpath
- def xpath_text_pseudo(self, xpath):
- """Matches all text input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="text"/></div>')
- >>> d('input:text')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'text' and name(.) = 'input'")
- return xpath
- def xpath_checkbox_pseudo(self, xpath):
- """Matches all checkbox input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="checkbox"/></div>')
- >>> d('input:checkbox')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'checkbox' and name(.) = 'input'")
- return xpath
- def xpath_password_pseudo(self, xpath):
- """Matches all password input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="password"/></div>')
- >>> d('input:password')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'password' and name(.) = 'input'")
- return xpath
- def xpath_submit_pseudo(self, xpath):
- """Matches all submit input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="submit"/></div>')
- >>> d('input:submit')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'submit' and name(.) = 'input'")
- return xpath
- def xpath_hidden_pseudo(self, xpath):
- """Matches all hidden input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="hidden"/></div>')
- >>> d('input:hidden')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'hidden' and name(.) = 'input'")
- return xpath
- def xpath_image_pseudo(self, xpath):
- """Matches all image input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="image"/></div>')
- >>> d('input:image')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'image' and name(.) = 'input'")
- return xpath
- def xpath_reset_pseudo(self, xpath):
- """Matches all reset input elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><input type="reset"/></div>')
- >>> d('input:reset')
- [<input>]
- ..
- """
- xpath.add_condition("@type = 'reset' and name(.) = 'input'")
- return xpath
- def xpath_header_pseudo(self, xpath):
- """Matches all header elelements (h1, ..., h6)::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1>title</h1></div>')
- >>> d(':header')
- [<h1>]
- ..
- """
- # this seems kind of brute-force, is there a better way?
- xpath.add_condition((
- "(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') "
- "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')"))
- return xpath
- def xpath_parent_pseudo(self, xpath):
- """Match all elements that contain other elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1><span>title</span></h1><h1/></div>')
- >>> d('h1:parent')
- [<h1>]
- ..
- """
- xpath.add_condition("count(child::*) > 0")
- return xpath
- def xpath_empty_pseudo(self, xpath):
- """Match all elements that do not contain other elements::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>')
- >>> d(':empty')
- [<h2>]
- ..
- """
- xpath.add_condition("not(node())")
- return xpath
- def xpath_eq_function(self, xpath, function):
- """Matches a single element by its index::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
- >>> d('h1:eq(0)')
- [<h1.first>]
- >>> d('h1:eq(1)')
- [<h1.last>]
- ..
- """
- if function.argument_types() != ['NUMBER']:
- raise ExpressionError(
- "Expected a single integer for :eq(), got %r" % (
- function.arguments,))
- value = int(function.arguments[0].value)
- xpath.add_post_condition('position() = %s' % (value + 1))
- return xpath
- def xpath_gt_function(self, xpath, function):
- """Matches all elements with an index over the given one::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
- >>> d('h1:gt(0)')
- [<h1.last>]
- ..
- """
- if function.argument_types() != ['NUMBER']:
- raise ExpressionError(
- "Expected a single integer for :gt(), got %r" % (
- function.arguments,))
- value = int(function.arguments[0].value)
- xpath.add_post_condition('position() > %s' % (value + 1))
- return xpath
- def xpath_lt_function(self, xpath, function):
- """Matches all elements with an index below the given one::
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
- >>> d('h1:lt(1)')
- [<h1.first>]
- ..
- """
- if function.argument_types() != ['NUMBER']:
- raise ExpressionError(
- "Expected a single integer for :gt(), got %r" % (
- function.arguments,))
- value = int(function.arguments[0].value)
- xpath.add_post_condition('position() < %s' % (value + 1))
- return xpath
- def xpath_contains_function(self, xpath, function):
- """Matches all elements that contain the given text
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>')
- >>> d('h1:contains("title")')
- [<h1.title>]
- ..
- """
- if function.argument_types() not in (['STRING'], ['IDENT']):
- raise ExpressionError(
- "Expected a single string or ident for :contains(), got %r" % (
- function.arguments,))
- value = self.xpath_literal(function.arguments[0].value)
- xpath.add_post_condition('contains(., %s)' % value)
- return xpath
- def xpath_has_function(self, xpath, function):
- """Matches elements which contain at least one element that matches
- the specified selector. https://api.jquery.com/has-selector/
- >>> from pyquery import PyQuery
- >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>')
- >>> d('.foo:has(".baz")')
- []
- >>> d('.foo:has(".foo")')
- []
- >>> d('.foo:has(".bar")')
- [<div.foo>]
- >>> d('.foo:has(div)')
- [<div.foo>]
- ..
- """
- if function.argument_types() not in (['STRING'], ['IDENT']):
- raise ExpressionError(
- "Expected a single string or ident for :has(), got %r" % (
- function.arguments,))
- value = self.css_to_xpath(
- function.arguments[0].value, prefix='descendant::',
- )
- xpath.add_post_condition(value)
- return xpath
|