cssselectpatch.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. # -*- coding:utf-8 -*-
  2. #
  3. # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com>
  4. #
  5. # Distributed under the BSD license, see LICENSE.txt
  6. from __future__ import unicode_literals
  7. from cssselect import xpath as cssselect_xpath
  8. from cssselect.xpath import ExpressionError
  9. XPathExprOrig = cssselect_xpath.XPathExpr
  10. class XPathExpr(XPathExprOrig):
  11. def __init__(self, path='', element='*', condition='', star_prefix=False):
  12. self.path = path
  13. self.element = element
  14. self.condition = condition
  15. self.post_condition = None
  16. def add_post_condition(self, post_condition):
  17. if self.post_condition:
  18. self.post_condition = '%s and (%s)' % (self.post_condition,
  19. post_condition)
  20. else:
  21. self.post_condition = post_condition
  22. def __str__(self):
  23. path = XPathExprOrig.__str__(self)
  24. if self.post_condition:
  25. path = '%s[%s]' % (path, self.post_condition)
  26. return path
  27. def join(self, combiner, other):
  28. res = XPathExprOrig.join(self, combiner, other)
  29. self.post_condition = other.post_condition
  30. return res
  31. # keep cssselect < 0.8 compat for now
  32. class JQueryTranslator(cssselect_xpath.HTMLTranslator):
  33. """This class is used to implement the css pseudo classes
  34. (:first, :last, ...) that are not defined in the css standard,
  35. but are defined in the jquery API.
  36. """
  37. xpathexpr_cls = XPathExpr
  38. def xpath_first_pseudo(self, xpath):
  39. """Matches the first selected element::
  40. >>> from pyquery import PyQuery
  41. >>> d = PyQuery('<div><p class="first"></p><p></p></div>')
  42. >>> d('p:first')
  43. [<p.first>]
  44. ..
  45. """
  46. xpath.add_post_condition('position() = 1')
  47. return xpath
  48. def xpath_last_pseudo(self, xpath):
  49. """Matches the last selected element::
  50. >>> from pyquery import PyQuery
  51. >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
  52. >>> d('p:last')
  53. [<p.last>]
  54. ..
  55. """
  56. xpath.add_post_condition('position() = last()')
  57. return xpath
  58. def xpath_even_pseudo(self, xpath):
  59. """Matches even elements, zero-indexed::
  60. >>> from pyquery import PyQuery
  61. >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
  62. >>> d('p:even')
  63. [<p>]
  64. ..
  65. """
  66. # the first element is 1 in xpath and 0 in python and js
  67. xpath.add_post_condition('position() mod 2 = 1')
  68. return xpath
  69. def xpath_odd_pseudo(self, xpath):
  70. """Matches odd elements, zero-indexed::
  71. >>> from pyquery import PyQuery
  72. >>> d = PyQuery('<div><p></p><p class="last"></p></div>')
  73. >>> d('p:odd')
  74. [<p.last>]
  75. ..
  76. """
  77. xpath.add_post_condition('position() mod 2 = 0')
  78. return xpath
  79. def xpath_checked_pseudo(self, xpath):
  80. """Matches odd elements, zero-indexed::
  81. >>> from pyquery import PyQuery
  82. >>> d = PyQuery('<div><input checked="checked"/></div>')
  83. >>> d('input:checked')
  84. [<input>]
  85. ..
  86. """
  87. xpath.add_condition("@checked and name(.) = 'input'")
  88. return xpath
  89. def xpath_selected_pseudo(self, xpath):
  90. """Matches all elements that are selected::
  91. >>> from pyquery import PyQuery
  92. >>> d = PyQuery('<select><option selected="selected"/></select>')
  93. >>> d('option:selected')
  94. [<option>]
  95. ..
  96. """
  97. xpath.add_condition("@selected and name(.) = 'option'")
  98. return xpath
  99. def xpath_disabled_pseudo(self, xpath):
  100. """Matches all elements that are disabled::
  101. >>> from pyquery import PyQuery
  102. >>> d = PyQuery('<div><input disabled="disabled"/></div>')
  103. >>> d('input:disabled')
  104. [<input>]
  105. ..
  106. """
  107. xpath.add_condition("@disabled")
  108. return xpath
  109. def xpath_enabled_pseudo(self, xpath):
  110. """Matches all elements that are enabled::
  111. >>> from pyquery import PyQuery
  112. >>> d = PyQuery('<div><input value="foo" /></div>')
  113. >>> d('input:enabled')
  114. [<input>]
  115. ..
  116. """
  117. xpath.add_condition("not(@disabled) and name(.) = 'input'")
  118. return xpath
  119. def xpath_file_pseudo(self, xpath):
  120. """Matches all input elements of type file::
  121. >>> from pyquery import PyQuery
  122. >>> d = PyQuery('<div><input type="file"/></div>')
  123. >>> d('input:file')
  124. [<input>]
  125. ..
  126. """
  127. xpath.add_condition("@type = 'file' and name(.) = 'input'")
  128. return xpath
  129. def xpath_input_pseudo(self, xpath):
  130. """Matches all input elements::
  131. >>> from pyquery import PyQuery
  132. >>> d = PyQuery(('<div><input type="file"/>'
  133. ... '<textarea></textarea></div>'))
  134. >>> d(':input')
  135. [<input>, <textarea>]
  136. ..
  137. """
  138. xpath.add_condition((
  139. "(name(.) = 'input' or name(.) = 'select') "
  140. "or (name(.) = 'textarea' or name(.) = 'button')"))
  141. return xpath
  142. def xpath_button_pseudo(self, xpath):
  143. """Matches all button input elements and the button element::
  144. >>> from pyquery import PyQuery
  145. >>> d = PyQuery(('<div><input type="button"/>'
  146. ... '<button></button></div>'))
  147. >>> d(':button')
  148. [<input>, <button>]
  149. ..
  150. """
  151. xpath.add_condition((
  152. "(@type = 'button' and name(.) = 'input') "
  153. "or name(.) = 'button'"))
  154. return xpath
  155. def xpath_radio_pseudo(self, xpath):
  156. """Matches all radio input elements::
  157. >>> from pyquery import PyQuery
  158. >>> d = PyQuery('<div><input type="radio"/></div>')
  159. >>> d('input:radio')
  160. [<input>]
  161. ..
  162. """
  163. xpath.add_condition("@type = 'radio' and name(.) = 'input'")
  164. return xpath
  165. def xpath_text_pseudo(self, xpath):
  166. """Matches all text input elements::
  167. >>> from pyquery import PyQuery
  168. >>> d = PyQuery('<div><input type="text"/></div>')
  169. >>> d('input:text')
  170. [<input>]
  171. ..
  172. """
  173. xpath.add_condition("@type = 'text' and name(.) = 'input'")
  174. return xpath
  175. def xpath_checkbox_pseudo(self, xpath):
  176. """Matches all checkbox input elements::
  177. >>> from pyquery import PyQuery
  178. >>> d = PyQuery('<div><input type="checkbox"/></div>')
  179. >>> d('input:checkbox')
  180. [<input>]
  181. ..
  182. """
  183. xpath.add_condition("@type = 'checkbox' and name(.) = 'input'")
  184. return xpath
  185. def xpath_password_pseudo(self, xpath):
  186. """Matches all password input elements::
  187. >>> from pyquery import PyQuery
  188. >>> d = PyQuery('<div><input type="password"/></div>')
  189. >>> d('input:password')
  190. [<input>]
  191. ..
  192. """
  193. xpath.add_condition("@type = 'password' and name(.) = 'input'")
  194. return xpath
  195. def xpath_submit_pseudo(self, xpath):
  196. """Matches all submit input elements::
  197. >>> from pyquery import PyQuery
  198. >>> d = PyQuery('<div><input type="submit"/></div>')
  199. >>> d('input:submit')
  200. [<input>]
  201. ..
  202. """
  203. xpath.add_condition("@type = 'submit' and name(.) = 'input'")
  204. return xpath
  205. def xpath_hidden_pseudo(self, xpath):
  206. """Matches all hidden input elements::
  207. >>> from pyquery import PyQuery
  208. >>> d = PyQuery('<div><input type="hidden"/></div>')
  209. >>> d('input:hidden')
  210. [<input>]
  211. ..
  212. """
  213. xpath.add_condition("@type = 'hidden' and name(.) = 'input'")
  214. return xpath
  215. def xpath_image_pseudo(self, xpath):
  216. """Matches all image input elements::
  217. >>> from pyquery import PyQuery
  218. >>> d = PyQuery('<div><input type="image"/></div>')
  219. >>> d('input:image')
  220. [<input>]
  221. ..
  222. """
  223. xpath.add_condition("@type = 'image' and name(.) = 'input'")
  224. return xpath
  225. def xpath_reset_pseudo(self, xpath):
  226. """Matches all reset input elements::
  227. >>> from pyquery import PyQuery
  228. >>> d = PyQuery('<div><input type="reset"/></div>')
  229. >>> d('input:reset')
  230. [<input>]
  231. ..
  232. """
  233. xpath.add_condition("@type = 'reset' and name(.) = 'input'")
  234. return xpath
  235. def xpath_header_pseudo(self, xpath):
  236. """Matches all header elelements (h1, ..., h6)::
  237. >>> from pyquery import PyQuery
  238. >>> d = PyQuery('<div><h1>title</h1></div>')
  239. >>> d(':header')
  240. [<h1>]
  241. ..
  242. """
  243. # this seems kind of brute-force, is there a better way?
  244. xpath.add_condition((
  245. "(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') "
  246. "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')"))
  247. return xpath
  248. def xpath_parent_pseudo(self, xpath):
  249. """Match all elements that contain other elements::
  250. >>> from pyquery import PyQuery
  251. >>> d = PyQuery('<div><h1><span>title</span></h1><h1/></div>')
  252. >>> d('h1:parent')
  253. [<h1>]
  254. ..
  255. """
  256. xpath.add_condition("count(child::*) > 0")
  257. return xpath
  258. def xpath_empty_pseudo(self, xpath):
  259. """Match all elements that do not contain other elements::
  260. >>> from pyquery import PyQuery
  261. >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>')
  262. >>> d(':empty')
  263. [<h2>]
  264. ..
  265. """
  266. xpath.add_condition("not(node())")
  267. return xpath
  268. def xpath_eq_function(self, xpath, function):
  269. """Matches a single element by its index::
  270. >>> from pyquery import PyQuery
  271. >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
  272. >>> d('h1:eq(0)')
  273. [<h1.first>]
  274. >>> d('h1:eq(1)')
  275. [<h1.last>]
  276. ..
  277. """
  278. if function.argument_types() != ['NUMBER']:
  279. raise ExpressionError(
  280. "Expected a single integer for :eq(), got %r" % (
  281. function.arguments,))
  282. value = int(function.arguments[0].value)
  283. xpath.add_post_condition('position() = %s' % (value + 1))
  284. return xpath
  285. def xpath_gt_function(self, xpath, function):
  286. """Matches all elements with an index over the given one::
  287. >>> from pyquery import PyQuery
  288. >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
  289. >>> d('h1:gt(0)')
  290. [<h1.last>]
  291. ..
  292. """
  293. if function.argument_types() != ['NUMBER']:
  294. raise ExpressionError(
  295. "Expected a single integer for :gt(), got %r" % (
  296. function.arguments,))
  297. value = int(function.arguments[0].value)
  298. xpath.add_post_condition('position() > %s' % (value + 1))
  299. return xpath
  300. def xpath_lt_function(self, xpath, function):
  301. """Matches all elements with an index below the given one::
  302. >>> from pyquery import PyQuery
  303. >>> d = PyQuery('<div><h1 class="first"/><h1 class="last"/></div>')
  304. >>> d('h1:lt(1)')
  305. [<h1.first>]
  306. ..
  307. """
  308. if function.argument_types() != ['NUMBER']:
  309. raise ExpressionError(
  310. "Expected a single integer for :gt(), got %r" % (
  311. function.arguments,))
  312. value = int(function.arguments[0].value)
  313. xpath.add_post_condition('position() < %s' % (value + 1))
  314. return xpath
  315. def xpath_contains_function(self, xpath, function):
  316. """Matches all elements that contain the given text
  317. >>> from pyquery import PyQuery
  318. >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>')
  319. >>> d('h1:contains("title")')
  320. [<h1.title>]
  321. ..
  322. """
  323. if function.argument_types() not in (['STRING'], ['IDENT']):
  324. raise ExpressionError(
  325. "Expected a single string or ident for :contains(), got %r" % (
  326. function.arguments,))
  327. value = self.xpath_literal(function.arguments[0].value)
  328. xpath.add_post_condition('contains(., %s)' % value)
  329. return xpath
  330. def xpath_has_function(self, xpath, function):
  331. """Matches elements which contain at least one element that matches
  332. the specified selector. https://api.jquery.com/has-selector/
  333. >>> from pyquery import PyQuery
  334. >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>')
  335. >>> d('.foo:has(".baz")')
  336. []
  337. >>> d('.foo:has(".foo")')
  338. []
  339. >>> d('.foo:has(".bar")')
  340. [<div.foo>]
  341. >>> d('.foo:has(div)')
  342. [<div.foo>]
  343. ..
  344. """
  345. if function.argument_types() not in (['STRING'], ['IDENT']):
  346. raise ExpressionError(
  347. "Expected a single string or ident for :has(), got %r" % (
  348. function.arguments,))
  349. value = self.css_to_xpath(
  350. function.arguments[0].value, prefix='descendant::',
  351. )
  352. xpath.add_post_condition(value)
  353. return xpath