parser.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. # -*- coding: utf-8 -*-
  2. """
  3. cssselect.parser
  4. ================
  5. Tokenizer, parser and parsed objects for CSS selectors.
  6. :copyright: (c) 2007-2012 Ian Bicking and contributors.
  7. See AUTHORS for more details.
  8. :license: BSD, see LICENSE for more details.
  9. """
  10. import sys
  11. import re
  12. import operator
  13. if sys.version_info[0] < 3:
  14. _unicode = unicode
  15. _unichr = unichr
  16. else:
  17. _unicode = str
  18. _unichr = chr
  19. def ascii_lower(string):
  20. """Lower-case, but only in the ASCII range."""
  21. return string.encode('utf8').lower().decode('utf8')
  22. class SelectorError(Exception):
  23. """Common parent for :class:`SelectorSyntaxError` and
  24. :class:`ExpressionError`.
  25. You can just use ``except SelectorError:`` when calling
  26. :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types.
  27. """
  28. class SelectorSyntaxError(SelectorError, SyntaxError):
  29. """Parsing a selector that does not match the grammar."""
  30. #### Parsed objects
  31. class Selector(object):
  32. """
  33. Represents a parsed selector.
  34. :meth:`~GenericTranslator.selector_to_xpath` accepts this object,
  35. but ignores :attr:`pseudo_element`. It is the user’s responsibility
  36. to account for pseudo-elements and reject selectors with unknown
  37. or unsupported pseudo-elements.
  38. """
  39. def __init__(self, tree, pseudo_element=None):
  40. self.parsed_tree = tree
  41. if pseudo_element is not None and not isinstance(
  42. pseudo_element, FunctionalPseudoElement):
  43. pseudo_element = ascii_lower(pseudo_element)
  44. #: A :class:`FunctionalPseudoElement`,
  45. #: or the identifier for the pseudo-element as a string,
  46. # or ``None``.
  47. #:
  48. #: +-------------------------+----------------+--------------------------------+
  49. #: | | Selector | Pseudo-element |
  50. #: +=========================+================+================================+
  51. #: | CSS3 syntax | ``a::before`` | ``'before'`` |
  52. #: +-------------------------+----------------+--------------------------------+
  53. #: | Older syntax | ``a:before`` | ``'before'`` |
  54. #: +-------------------------+----------------+--------------------------------+
  55. #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
  56. #: | not in Selectors3 | | |
  57. #: +-------------------------+----------------+--------------------------------+
  58. #: | Invalid pseudo-class | ``li:marker`` | ``None`` |
  59. #: +-------------------------+----------------+--------------------------------+
  60. #: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
  61. #: +-------------------------+----------------+--------------------------------+
  62. #:
  63. #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
  64. self.pseudo_element = pseudo_element
  65. def __repr__(self):
  66. if isinstance(self.pseudo_element, FunctionalPseudoElement):
  67. pseudo_element = repr(self.pseudo_element)
  68. elif self.pseudo_element:
  69. pseudo_element = '::%s' % self.pseudo_element
  70. else:
  71. pseudo_element = ''
  72. return '%s[%r%s]' % (
  73. self.__class__.__name__, self.parsed_tree, pseudo_element)
  74. def specificity(self):
  75. """Return the specificity_ of this selector as a tuple of 3 integers.
  76. .. _specificity: http://www.w3.org/TR/selectors/#specificity
  77. """
  78. a, b, c = self.parsed_tree.specificity()
  79. if self.pseudo_element:
  80. c += 1
  81. return a, b, c
  82. class Class(object):
  83. """
  84. Represents selector.class_name
  85. """
  86. def __init__(self, selector, class_name):
  87. self.selector = selector
  88. self.class_name = class_name
  89. def __repr__(self):
  90. return '%s[%r.%s]' % (
  91. self.__class__.__name__, self.selector, self.class_name)
  92. def specificity(self):
  93. a, b, c = self.selector.specificity()
  94. b += 1
  95. return a, b, c
  96. class FunctionalPseudoElement(object):
  97. """
  98. Represents selector::name(arguments)
  99. .. attribute:: name
  100. The name (identifier) of the pseudo-element, as a string.
  101. .. attribute:: arguments
  102. The arguments of the pseudo-element, as a list of tokens.
  103. **Note:** tokens are not part of the public API,
  104. and may change between cssselect versions.
  105. Use at your own risks.
  106. """
  107. def __init__(self, name, arguments):
  108. self.name = ascii_lower(name)
  109. self.arguments = arguments
  110. def __repr__(self):
  111. return '%s[::%s(%r)]' % (
  112. self.__class__.__name__, self.name,
  113. [token.value for token in self.arguments])
  114. def argument_types(self):
  115. return [token.type for token in self.arguments]
  116. def specificity(self):
  117. a, b, c = self.selector.specificity()
  118. b += 1
  119. return a, b, c
  120. class Function(object):
  121. """
  122. Represents selector:name(expr)
  123. """
  124. def __init__(self, selector, name, arguments):
  125. self.selector = selector
  126. self.name = ascii_lower(name)
  127. self.arguments = arguments
  128. def __repr__(self):
  129. return '%s[%r:%s(%r)]' % (
  130. self.__class__.__name__, self.selector, self.name,
  131. [token.value for token in self.arguments])
  132. def argument_types(self):
  133. return [token.type for token in self.arguments]
  134. def specificity(self):
  135. a, b, c = self.selector.specificity()
  136. b += 1
  137. return a, b, c
  138. class Pseudo(object):
  139. """
  140. Represents selector:ident
  141. """
  142. def __init__(self, selector, ident):
  143. self.selector = selector
  144. self.ident = ascii_lower(ident)
  145. def __repr__(self):
  146. return '%s[%r:%s]' % (
  147. self.__class__.__name__, self.selector, self.ident)
  148. def specificity(self):
  149. a, b, c = self.selector.specificity()
  150. b += 1
  151. return a, b, c
  152. class Negation(object):
  153. """
  154. Represents selector:not(subselector)
  155. """
  156. def __init__(self, selector, subselector):
  157. self.selector = selector
  158. self.subselector = subselector
  159. def __repr__(self):
  160. return '%s[%r:not(%r)]' % (
  161. self.__class__.__name__, self.selector, self.subselector)
  162. def specificity(self):
  163. a1, b1, c1 = self.selector.specificity()
  164. a2, b2, c2 = self.subselector.specificity()
  165. return a1 + a2, b1 + b2, c1 + c2
  166. class Attrib(object):
  167. """
  168. Represents selector[namespace|attrib operator value]
  169. """
  170. def __init__(self, selector, namespace, attrib, operator, value):
  171. self.selector = selector
  172. self.namespace = namespace
  173. self.attrib = attrib
  174. self.operator = operator
  175. self.value = value
  176. def __repr__(self):
  177. if self.namespace:
  178. attrib = '%s|%s' % (self.namespace, self.attrib)
  179. else:
  180. attrib = self.attrib
  181. if self.operator == 'exists':
  182. return '%s[%r[%s]]' % (
  183. self.__class__.__name__, self.selector, attrib)
  184. else:
  185. return '%s[%r[%s %s %r]]' % (
  186. self.__class__.__name__, self.selector, attrib,
  187. self.operator, self.value)
  188. def specificity(self):
  189. a, b, c = self.selector.specificity()
  190. b += 1
  191. return a, b, c
  192. class Element(object):
  193. """
  194. Represents namespace|element
  195. `None` is for the universal selector '*'
  196. """
  197. def __init__(self, namespace=None, element=None):
  198. self.namespace = namespace
  199. self.element = element
  200. def __repr__(self):
  201. element = self.element or '*'
  202. if self.namespace:
  203. element = '%s|%s' % (self.namespace, element)
  204. return '%s[%s]' % (self.__class__.__name__, element)
  205. def specificity(self):
  206. if self.element:
  207. return 0, 0, 1
  208. else:
  209. return 0, 0, 0
  210. class Hash(object):
  211. """
  212. Represents selector#id
  213. """
  214. def __init__(self, selector, id):
  215. self.selector = selector
  216. self.id = id
  217. def __repr__(self):
  218. return '%s[%r#%s]' % (
  219. self.__class__.__name__, self.selector, self.id)
  220. def specificity(self):
  221. a, b, c = self.selector.specificity()
  222. a += 1
  223. return a, b, c
  224. class CombinedSelector(object):
  225. def __init__(self, selector, combinator, subselector):
  226. assert selector is not None
  227. self.selector = selector
  228. self.combinator = combinator
  229. self.subselector = subselector
  230. def __repr__(self):
  231. if self.combinator == ' ':
  232. comb = '<followed>'
  233. else:
  234. comb = self.combinator
  235. return '%s[%r %s %r]' % (
  236. self.__class__.__name__, self.selector, comb, self.subselector)
  237. def specificity(self):
  238. a1, b1, c1 = self.selector.specificity()
  239. a2, b2, c2 = self.subselector.specificity()
  240. return a1 + a2, b1 + b2, c1 + c2
  241. #### Parser
  242. # foo
  243. _el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
  244. # foo#bar or #bar
  245. _id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
  246. # foo.bar or .bar
  247. _class_re = re.compile(
  248. r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
  249. def parse(css):
  250. """Parse a CSS *group of selectors*.
  251. If you don't care about pseudo-elements or selector specificity,
  252. you can skip this and use :meth:`~GenericTranslator.css_to_xpath`.
  253. :param css:
  254. A *group of selectors* as an Unicode string.
  255. :raises:
  256. :class:`SelectorSyntaxError` on invalid selectors.
  257. :returns:
  258. A list of parsed :class:`Selector` objects, one for each
  259. selector in the comma-separated group.
  260. """
  261. # Fast path for simple cases
  262. match = _el_re.match(css)
  263. if match:
  264. return [Selector(Element(element=match.group(1)))]
  265. match = _id_re.match(css)
  266. if match is not None:
  267. return [Selector(Hash(Element(element=match.group(1) or None),
  268. match.group(2)))]
  269. match = _class_re.match(css)
  270. if match is not None:
  271. return [Selector(Class(Element(element=match.group(1) or None),
  272. match.group(2)))]
  273. stream = TokenStream(tokenize(css))
  274. stream.source = css
  275. return list(parse_selector_group(stream))
  276. # except SelectorSyntaxError:
  277. # e = sys.exc_info()[1]
  278. # message = "%s at %s -> %r" % (
  279. # e, stream.used, stream.peek())
  280. # e.msg = message
  281. # if sys.version_info < (2,6):
  282. # e.message = message
  283. # e.args = tuple([message])
  284. # raise
  285. def parse_selector_group(stream):
  286. stream.skip_whitespace()
  287. while 1:
  288. yield Selector(*parse_selector(stream))
  289. if stream.peek() == ('DELIM', ','):
  290. stream.next()
  291. stream.skip_whitespace()
  292. else:
  293. break
  294. def parse_selector(stream):
  295. result, pseudo_element = parse_simple_selector(stream)
  296. while 1:
  297. stream.skip_whitespace()
  298. peek = stream.peek()
  299. if peek in (('EOF', None), ('DELIM', ',')):
  300. break
  301. if pseudo_element:
  302. raise SelectorSyntaxError(
  303. 'Got pseudo-element ::%s not at the end of a selector'
  304. % pseudo_element)
  305. if peek.is_delim('+', '>', '~'):
  306. # A combinator
  307. combinator = stream.next().value
  308. stream.skip_whitespace()
  309. else:
  310. # By exclusion, the last parse_simple_selector() ended
  311. # at peek == ' '
  312. combinator = ' '
  313. next_selector, pseudo_element = parse_simple_selector(stream)
  314. result = CombinedSelector(result, combinator, next_selector)
  315. return result, pseudo_element
  316. def parse_simple_selector(stream, inside_negation=False):
  317. stream.skip_whitespace()
  318. selector_start = len(stream.used)
  319. peek = stream.peek()
  320. if peek.type == 'IDENT' or peek == ('DELIM', '*'):
  321. if peek.type == 'IDENT':
  322. namespace = stream.next().value
  323. else:
  324. stream.next()
  325. namespace = None
  326. if stream.peek() == ('DELIM', '|'):
  327. stream.next()
  328. element = stream.next_ident_or_star()
  329. else:
  330. element = namespace
  331. namespace = None
  332. else:
  333. element = namespace = None
  334. result = Element(namespace, element)
  335. pseudo_element = None
  336. while 1:
  337. peek = stream.peek()
  338. if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
  339. inside_negation and peek == ('DELIM', ')')):
  340. break
  341. if pseudo_element:
  342. raise SelectorSyntaxError(
  343. 'Got pseudo-element ::%s not at the end of a selector'
  344. % pseudo_element)
  345. if peek.type == 'HASH':
  346. result = Hash(result, stream.next().value)
  347. elif peek == ('DELIM', '.'):
  348. stream.next()
  349. result = Class(result, stream.next_ident())
  350. elif peek == ('DELIM', '['):
  351. stream.next()
  352. result = parse_attrib(result, stream)
  353. elif peek == ('DELIM', ':'):
  354. stream.next()
  355. if stream.peek() == ('DELIM', ':'):
  356. stream.next()
  357. pseudo_element = stream.next_ident()
  358. if stream.peek() == ('DELIM', '('):
  359. stream.next()
  360. pseudo_element = FunctionalPseudoElement(
  361. pseudo_element, parse_arguments(stream))
  362. continue
  363. ident = stream.next_ident()
  364. if ident.lower() in ('first-line', 'first-letter',
  365. 'before', 'after'):
  366. # Special case: CSS 2.1 pseudo-elements can have a single ':'
  367. # Any new pseudo-element must have two.
  368. pseudo_element = _unicode(ident)
  369. continue
  370. if stream.peek() != ('DELIM', '('):
  371. result = Pseudo(result, ident)
  372. continue
  373. stream.next()
  374. stream.skip_whitespace()
  375. if ident.lower() == 'not':
  376. if inside_negation:
  377. raise SelectorSyntaxError('Got nested :not()')
  378. argument, argument_pseudo_element = parse_simple_selector(
  379. stream, inside_negation=True)
  380. next = stream.next()
  381. if argument_pseudo_element:
  382. raise SelectorSyntaxError(
  383. 'Got pseudo-element ::%s inside :not() at %s'
  384. % (argument_pseudo_element, next.pos))
  385. if next != ('DELIM', ')'):
  386. raise SelectorSyntaxError("Expected ')', got %s" % (next,))
  387. result = Negation(result, argument)
  388. else:
  389. result = Function(result, ident, parse_arguments(stream))
  390. else:
  391. raise SelectorSyntaxError(
  392. "Expected selector, got %s" % (peek,))
  393. if len(stream.used) == selector_start:
  394. raise SelectorSyntaxError(
  395. "Expected selector, got %s" % (stream.peek(),))
  396. return result, pseudo_element
  397. def parse_arguments(stream):
  398. arguments = []
  399. while 1:
  400. stream.skip_whitespace()
  401. next = stream.next()
  402. if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
  403. ('DELIM', '+'), ('DELIM', '-')]:
  404. arguments.append(next)
  405. elif next == ('DELIM', ')'):
  406. return arguments
  407. else:
  408. raise SelectorSyntaxError(
  409. "Expected an argument, got %s" % (next,))
  410. def parse_attrib(selector, stream):
  411. stream.skip_whitespace()
  412. attrib = stream.next_ident_or_star()
  413. if attrib is None and stream.peek() != ('DELIM', '|'):
  414. raise SelectorSyntaxError(
  415. "Expected '|', got %s" % (stream.peek(),))
  416. if stream.peek() == ('DELIM', '|'):
  417. stream.next()
  418. if stream.peek() == ('DELIM', '='):
  419. namespace = None
  420. stream.next()
  421. op = '|='
  422. else:
  423. namespace = attrib
  424. attrib = stream.next_ident()
  425. op = None
  426. else:
  427. namespace = op = None
  428. if op is None:
  429. stream.skip_whitespace()
  430. next = stream.next()
  431. if next == ('DELIM', ']'):
  432. return Attrib(selector, namespace, attrib, 'exists', None)
  433. elif next == ('DELIM', '='):
  434. op = '='
  435. elif next.is_delim('^', '$', '*', '~', '|', '!') and (
  436. stream.peek() == ('DELIM', '=')):
  437. op = next.value + '='
  438. stream.next()
  439. else:
  440. raise SelectorSyntaxError(
  441. "Operator expected, got %s" % (next,))
  442. stream.skip_whitespace()
  443. value = stream.next()
  444. if value.type not in ('IDENT', 'STRING'):
  445. raise SelectorSyntaxError(
  446. "Expected string or ident, got %s" % (value,))
  447. stream.skip_whitespace()
  448. next = stream.next()
  449. if next != ('DELIM', ']'):
  450. raise SelectorSyntaxError(
  451. "Expected ']', got %s" % (next,))
  452. return Attrib(selector, namespace, attrib, op, value.value)
  453. def parse_series(tokens):
  454. """
  455. Parses the arguments for :nth-child() and friends.
  456. :raises: A list of tokens
  457. :returns: :``(a, b)``
  458. """
  459. for token in tokens:
  460. if token.type == 'STRING':
  461. raise ValueError('String tokens not allowed in series.')
  462. s = ''.join(token.value for token in tokens).strip()
  463. if s == 'odd':
  464. return (2, 1)
  465. elif s == 'even':
  466. return (2, 0)
  467. elif s == 'n':
  468. return (1, 0)
  469. if 'n' not in s:
  470. # Just b
  471. return (0, int(s))
  472. a, b = s.split('n', 1)
  473. if not a:
  474. a = 1
  475. elif a == '-' or a == '+':
  476. a = int(a+'1')
  477. else:
  478. a = int(a)
  479. if not b:
  480. b = 0
  481. else:
  482. b = int(b)
  483. return (a, b)
  484. #### Token objects
  485. class Token(tuple):
  486. def __new__(cls, type_, value, pos):
  487. obj = tuple.__new__(cls, (type_, value))
  488. obj.pos = pos
  489. return obj
  490. def __repr__(self):
  491. return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
  492. def is_delim(self, *values):
  493. return self.type == 'DELIM' and self.value in values
  494. type = property(operator.itemgetter(0))
  495. value = property(operator.itemgetter(1))
  496. class EOFToken(Token):
  497. def __new__(cls, pos):
  498. return Token.__new__(cls, 'EOF', None, pos)
  499. def __repr__(self):
  500. return '<%s at %i>' % (self.type, self.pos)
  501. #### Tokenizer
  502. class TokenMacros:
  503. unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
  504. escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
  505. string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
  506. nonascii = r'[^\0-\177]'
  507. nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
  508. nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
  509. def _compile(pattern):
  510. return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
  511. _match_whitespace = _compile(r'[ \t\r\n\f]+')
  512. _match_number = _compile('[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
  513. _match_hash = _compile('#(?:%(nmchar)s)+')
  514. _match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
  515. _match_string_by_quote = {
  516. "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
  517. '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
  518. }
  519. _sub_simple_escape = re.compile(r'\\(.)').sub
  520. _sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
  521. _sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
  522. # Same as r'\1', but faster on CPython
  523. if hasattr(operator, 'methodcaller'):
  524. # Python 2.6+
  525. _replace_simple = operator.methodcaller('group', 1)
  526. else:
  527. def _replace_simple(match):
  528. return match.group(1)
  529. def _replace_unicode(match):
  530. codepoint = int(match.group(1), 16)
  531. if codepoint > sys.maxunicode:
  532. codepoint = 0xFFFD
  533. return _unichr(codepoint)
  534. def unescape_ident(value):
  535. value = _sub_unicode_escape(_replace_unicode, value)
  536. value = _sub_simple_escape(_replace_simple, value)
  537. return value
  538. def tokenize(s):
  539. pos = 0
  540. len_s = len(s)
  541. while pos < len_s:
  542. match = _match_whitespace(s, pos=pos)
  543. if match:
  544. yield Token('S', ' ', pos)
  545. pos = match.end()
  546. continue
  547. match = _match_ident(s, pos=pos)
  548. if match:
  549. value = _sub_simple_escape(_replace_simple,
  550. _sub_unicode_escape(_replace_unicode, match.group()))
  551. yield Token('IDENT', value, pos)
  552. pos = match.end()
  553. continue
  554. match = _match_hash(s, pos=pos)
  555. if match:
  556. value = _sub_simple_escape(_replace_simple,
  557. _sub_unicode_escape(_replace_unicode, match.group()[1:]))
  558. yield Token('HASH', value, pos)
  559. pos = match.end()
  560. continue
  561. quote = s[pos]
  562. if quote in _match_string_by_quote:
  563. match = _match_string_by_quote[quote](s, pos=pos + 1)
  564. assert match, 'Should have found at least an empty match'
  565. end_pos = match.end()
  566. if end_pos == len_s:
  567. raise SelectorSyntaxError('Unclosed string at %s' % pos)
  568. if s[end_pos] != quote:
  569. raise SelectorSyntaxError('Invalid string at %s' % pos)
  570. value = _sub_simple_escape(_replace_simple,
  571. _sub_unicode_escape(_replace_unicode,
  572. _sub_newline_escape('', match.group())))
  573. yield Token('STRING', value, pos)
  574. pos = end_pos + 1
  575. continue
  576. match = _match_number(s, pos=pos)
  577. if match:
  578. value = match.group()
  579. yield Token('NUMBER', value, pos)
  580. pos = match.end()
  581. continue
  582. pos2 = pos + 2
  583. if s[pos:pos2] == '/*':
  584. pos = s.find('*/', pos2)
  585. if pos == -1:
  586. pos = len_s
  587. else:
  588. pos += 2
  589. continue
  590. yield Token('DELIM', s[pos], pos)
  591. pos += 1
  592. assert pos == len_s
  593. yield EOFToken(pos)
  594. class TokenStream(object):
  595. def __init__(self, tokens, source=None):
  596. self.used = []
  597. self.tokens = iter(tokens)
  598. self.source = source
  599. self.peeked = None
  600. self._peeking = False
  601. try:
  602. self.next_token = self.tokens.next
  603. except AttributeError:
  604. # Python 3
  605. self.next_token = self.tokens.__next__
  606. def next(self):
  607. if self._peeking:
  608. self._peeking = False
  609. self.used.append(self.peeked)
  610. return self.peeked
  611. else:
  612. next = self.next_token()
  613. self.used.append(next)
  614. return next
  615. def peek(self):
  616. if not self._peeking:
  617. self.peeked = self.next_token()
  618. self._peeking = True
  619. return self.peeked
  620. def next_ident(self):
  621. next = self.next()
  622. if next.type != 'IDENT':
  623. raise SelectorSyntaxError('Expected ident, got %s' % (next,))
  624. return next.value
  625. def next_ident_or_star(self):
  626. next = self.next()
  627. if next.type == 'IDENT':
  628. return next.value
  629. elif next == ('DELIM', '*'):
  630. return None
  631. else:
  632. raise SelectorSyntaxError(
  633. "Expected ident or '*', got %s" % (next,))
  634. def skip_whitespace(self):
  635. peek = self.peek()
  636. if peek.type == 'S':
  637. self.next()