Scanning.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. # cython: infer_types=True, language_level=3, py2_import=True, auto_pickle=False
  2. #
  3. # Cython Scanner
  4. #
  5. from __future__ import absolute_import
  6. import cython
  7. cython.declare(make_lexicon=object, lexicon=object,
  8. print_function=object, error=object, warning=object,
  9. os=object, platform=object)
  10. import os
  11. import platform
  12. from .. import Utils
  13. from ..Plex.Scanners import Scanner
  14. from ..Plex.Errors import UnrecognizedInput
  15. from .Errors import error, warning
  16. from .Lexicon import any_string_prefix, make_lexicon, IDENT
  17. from .Future import print_function
  18. debug_scanner = 0
  19. trace_scanner = 0
  20. scanner_debug_flags = 0
  21. scanner_dump_file = None
  22. lexicon = None
  23. def get_lexicon():
  24. global lexicon
  25. if not lexicon:
  26. lexicon = make_lexicon()
  27. return lexicon
  28. #------------------------------------------------------------------
  29. py_reserved_words = [
  30. "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
  31. "continue", "return", "raise", "import", "exec", "try",
  32. "except", "finally", "while", "if", "elif", "else", "for",
  33. "in", "assert", "and", "or", "not", "is", "in", "lambda",
  34. "from", "yield", "with", "nonlocal",
  35. ]
  36. pyx_reserved_words = py_reserved_words + [
  37. "include", "ctypedef", "cdef", "cpdef",
  38. "cimport", "DEF", "IF", "ELIF", "ELSE"
  39. ]
  40. class Method(object):
  41. def __init__(self, name, **kwargs):
  42. self.name = name
  43. self.kwargs = kwargs or None
  44. self.__name__ = name # for Plex tracing
  45. def __call__(self, stream, text):
  46. method = getattr(stream, self.name)
  47. # self.kwargs is almost always unused => avoid call overhead
  48. return method(text, **self.kwargs) if self.kwargs is not None else method(text)
  49. def __copy__(self):
  50. return self # immutable, no need to copy
  51. def __deepcopy__(self, memo):
  52. return self # immutable, no need to copy
  53. #------------------------------------------------------------------
  54. class CompileTimeScope(object):
  55. def __init__(self, outer=None):
  56. self.entries = {}
  57. self.outer = outer
  58. def declare(self, name, value):
  59. self.entries[name] = value
  60. def update(self, other):
  61. self.entries.update(other)
  62. def lookup_here(self, name):
  63. return self.entries[name]
  64. def __contains__(self, name):
  65. return name in self.entries
  66. def lookup(self, name):
  67. try:
  68. return self.lookup_here(name)
  69. except KeyError:
  70. outer = self.outer
  71. if outer:
  72. return outer.lookup(name)
  73. else:
  74. raise
  75. def initial_compile_time_env():
  76. benv = CompileTimeScope()
  77. names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE', 'UNAME_VERSION', 'UNAME_MACHINE')
  78. for name, value in zip(names, platform.uname()):
  79. benv.declare(name, value)
  80. try:
  81. import __builtin__ as builtins
  82. except ImportError:
  83. import builtins
  84. names = (
  85. 'False', 'True',
  86. 'abs', 'all', 'any', 'ascii', 'bin', 'bool', 'bytearray', 'bytes',
  87. 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate', 'filter',
  88. 'float', 'format', 'frozenset', 'hash', 'hex', 'int', 'len',
  89. 'list', 'map', 'max', 'min', 'oct', 'ord', 'pow', 'range',
  90. 'repr', 'reversed', 'round', 'set', 'slice', 'sorted', 'str',
  91. 'sum', 'tuple', 'zip',
  92. ### defined below in a platform independent way
  93. # 'long', 'unicode', 'reduce', 'xrange'
  94. )
  95. for name in names:
  96. try:
  97. benv.declare(name, getattr(builtins, name))
  98. except AttributeError:
  99. # ignore, likely Py3
  100. pass
  101. # Py2/3 adaptations
  102. from functools import reduce
  103. benv.declare('reduce', reduce)
  104. benv.declare('unicode', getattr(builtins, 'unicode', getattr(builtins, 'str')))
  105. benv.declare('long', getattr(builtins, 'long', getattr(builtins, 'int')))
  106. benv.declare('xrange', getattr(builtins, 'xrange', getattr(builtins, 'range')))
  107. denv = CompileTimeScope(benv)
  108. return denv
  109. #------------------------------------------------------------------
  110. class SourceDescriptor(object):
  111. """
  112. A SourceDescriptor should be considered immutable.
  113. """
  114. _file_type = 'pyx'
  115. _escaped_description = None
  116. _cmp_name = ''
  117. def __str__(self):
  118. assert False # To catch all places where a descriptor is used directly as a filename
  119. def set_file_type_from_name(self, filename):
  120. name, ext = os.path.splitext(filename)
  121. self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
  122. def is_cython_file(self):
  123. return self._file_type in ('pyx', 'pxd')
  124. def is_python_file(self):
  125. return self._file_type == 'py'
  126. def get_escaped_description(self):
  127. if self._escaped_description is None:
  128. esc_desc = \
  129. self.get_description().encode('ASCII', 'replace').decode("ASCII")
  130. # Use forward slashes on Windows since these paths
  131. # will be used in the #line directives in the C/C++ files.
  132. self._escaped_description = esc_desc.replace('\\', '/')
  133. return self._escaped_description
  134. def __gt__(self, other):
  135. # this is only used to provide some sort of order
  136. try:
  137. return self._cmp_name > other._cmp_name
  138. except AttributeError:
  139. return False
  140. def __lt__(self, other):
  141. # this is only used to provide some sort of order
  142. try:
  143. return self._cmp_name < other._cmp_name
  144. except AttributeError:
  145. return False
  146. def __le__(self, other):
  147. # this is only used to provide some sort of order
  148. try:
  149. return self._cmp_name <= other._cmp_name
  150. except AttributeError:
  151. return False
  152. def __copy__(self):
  153. return self # immutable, no need to copy
  154. def __deepcopy__(self, memo):
  155. return self # immutable, no need to copy
  156. class FileSourceDescriptor(SourceDescriptor):
  157. """
  158. Represents a code source. A code source is a more generic abstraction
  159. for a "filename" (as sometimes the code doesn't come from a file).
  160. Instances of code sources are passed to Scanner.__init__ as the
  161. optional name argument and will be passed back when asking for
  162. the position()-tuple.
  163. """
  164. def __init__(self, filename, path_description=None):
  165. filename = Utils.decode_filename(filename)
  166. self.path_description = path_description or filename
  167. self.filename = filename
  168. # Prefer relative paths to current directory (which is most likely the project root) over absolute paths.
  169. workdir = os.path.abspath('.') + os.sep
  170. self.file_path = filename[len(workdir):] if filename.startswith(workdir) else filename
  171. self.set_file_type_from_name(filename)
  172. self._cmp_name = filename
  173. self._lines = {}
  174. def get_lines(self, encoding=None, error_handling=None):
  175. # we cache the lines only the second time this is called, in
  176. # order to save memory when they are only used once
  177. key = (encoding, error_handling)
  178. try:
  179. lines = self._lines[key]
  180. if lines is not None:
  181. return lines
  182. except KeyError:
  183. pass
  184. with Utils.open_source_file(self.filename, encoding=encoding, error_handling=error_handling) as f:
  185. lines = list(f)
  186. if key in self._lines:
  187. self._lines[key] = lines
  188. else:
  189. # do not cache the first access, but remember that we
  190. # already read it once
  191. self._lines[key] = None
  192. return lines
  193. def get_description(self):
  194. try:
  195. return os.path.relpath(self.path_description)
  196. except ValueError:
  197. # path not under current directory => use complete file path
  198. return self.path_description
  199. def get_error_description(self):
  200. path = self.filename
  201. cwd = Utils.decode_filename(os.getcwd() + os.path.sep)
  202. if path.startswith(cwd):
  203. return path[len(cwd):]
  204. return path
  205. def get_filenametable_entry(self):
  206. return self.file_path
  207. def __eq__(self, other):
  208. return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
  209. def __hash__(self):
  210. return hash(self.filename)
  211. def __repr__(self):
  212. return "<FileSourceDescriptor:%s>" % self.filename
  213. class StringSourceDescriptor(SourceDescriptor):
  214. """
  215. Instances of this class can be used instead of a filenames if the
  216. code originates from a string object.
  217. """
  218. filename = None
  219. def __init__(self, name, code):
  220. self.name = name
  221. #self.set_file_type_from_name(name)
  222. self.codelines = [x + "\n" for x in code.split("\n")]
  223. self._cmp_name = name
  224. def get_lines(self, encoding=None, error_handling=None):
  225. if not encoding:
  226. return self.codelines
  227. else:
  228. return [line.encode(encoding, error_handling).decode(encoding)
  229. for line in self.codelines]
  230. def get_description(self):
  231. return self.name
  232. get_error_description = get_description
  233. def get_filenametable_entry(self):
  234. return "stringsource"
  235. def __hash__(self):
  236. return id(self)
  237. # Do not hash on the name, an identical string source should be the
  238. # same object (name is often defaulted in other places)
  239. # return hash(self.name)
  240. def __eq__(self, other):
  241. return isinstance(other, StringSourceDescriptor) and self.name == other.name
  242. def __repr__(self):
  243. return "<StringSourceDescriptor:%s>" % self.name
  244. #------------------------------------------------------------------
  245. class PyrexScanner(Scanner):
  246. # context Context Compilation context
  247. # included_files [string] Files included with 'include' statement
  248. # compile_time_env dict Environment for conditional compilation
  249. # compile_time_eval boolean In a true conditional compilation context
  250. # compile_time_expr boolean In a compile-time expression context
  251. def __init__(self, file, filename, parent_scanner=None,
  252. scope=None, context=None, source_encoding=None, parse_comments=True, initial_pos=None):
  253. Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
  254. if parent_scanner:
  255. self.context = parent_scanner.context
  256. self.included_files = parent_scanner.included_files
  257. self.compile_time_env = parent_scanner.compile_time_env
  258. self.compile_time_eval = parent_scanner.compile_time_eval
  259. self.compile_time_expr = parent_scanner.compile_time_expr
  260. else:
  261. self.context = context
  262. self.included_files = scope.included_files
  263. self.compile_time_env = initial_compile_time_env()
  264. self.compile_time_eval = 1
  265. self.compile_time_expr = 0
  266. if getattr(context.options, 'compile_time_env', None):
  267. self.compile_time_env.update(context.options.compile_time_env)
  268. self.parse_comments = parse_comments
  269. self.source_encoding = source_encoding
  270. if filename.is_python_file():
  271. self.in_python_file = True
  272. self.keywords = set(py_reserved_words)
  273. else:
  274. self.in_python_file = False
  275. self.keywords = set(pyx_reserved_words)
  276. self.trace = trace_scanner
  277. self.indentation_stack = [0]
  278. self.indentation_char = None
  279. self.bracket_nesting_level = 0
  280. self.async_enabled = 0
  281. self.begin('INDENT')
  282. self.sy = ''
  283. self.next()
  284. def commentline(self, text):
  285. if self.parse_comments:
  286. self.produce('commentline', text)
  287. def strip_underscores(self, text, symbol):
  288. self.produce(symbol, text.replace('_', ''))
  289. def current_level(self):
  290. return self.indentation_stack[-1]
  291. def open_bracket_action(self, text):
  292. self.bracket_nesting_level += 1
  293. return text
  294. def close_bracket_action(self, text):
  295. self.bracket_nesting_level -= 1
  296. return text
  297. def newline_action(self, text):
  298. if self.bracket_nesting_level == 0:
  299. self.begin('INDENT')
  300. self.produce('NEWLINE', '')
  301. string_states = {
  302. "'": 'SQ_STRING',
  303. '"': 'DQ_STRING',
  304. "'''": 'TSQ_STRING',
  305. '"""': 'TDQ_STRING'
  306. }
  307. def begin_string_action(self, text):
  308. while text[:1] in any_string_prefix:
  309. text = text[1:]
  310. self.begin(self.string_states[text])
  311. self.produce('BEGIN_STRING')
  312. def end_string_action(self, text):
  313. self.begin('')
  314. self.produce('END_STRING')
  315. def unclosed_string_action(self, text):
  316. self.end_string_action(text)
  317. self.error("Unclosed string literal")
  318. def indentation_action(self, text):
  319. self.begin('')
  320. # Indentation within brackets should be ignored.
  321. #if self.bracket_nesting_level > 0:
  322. # return
  323. # Check that tabs and spaces are being used consistently.
  324. if text:
  325. c = text[0]
  326. #print "Scanner.indentation_action: indent with", repr(c) ###
  327. if self.indentation_char is None:
  328. self.indentation_char = c
  329. #print "Scanner.indentation_action: setting indent_char to", repr(c)
  330. else:
  331. if self.indentation_char != c:
  332. self.error("Mixed use of tabs and spaces")
  333. if text.replace(c, "") != "":
  334. self.error("Mixed use of tabs and spaces")
  335. # Figure out how many indents/dedents to do
  336. current_level = self.current_level()
  337. new_level = len(text)
  338. #print "Changing indent level from", current_level, "to", new_level ###
  339. if new_level == current_level:
  340. return
  341. elif new_level > current_level:
  342. #print "...pushing level", new_level ###
  343. self.indentation_stack.append(new_level)
  344. self.produce('INDENT', '')
  345. else:
  346. while new_level < self.current_level():
  347. #print "...popping level", self.indentation_stack[-1] ###
  348. self.indentation_stack.pop()
  349. self.produce('DEDENT', '')
  350. #print "...current level now", self.current_level() ###
  351. if new_level != self.current_level():
  352. self.error("Inconsistent indentation")
  353. def eof_action(self, text):
  354. while len(self.indentation_stack) > 1:
  355. self.produce('DEDENT', '')
  356. self.indentation_stack.pop()
  357. self.produce('EOF', '')
  358. def next(self):
  359. try:
  360. sy, systring = self.read()
  361. except UnrecognizedInput:
  362. self.error("Unrecognized character")
  363. return # just a marker, error() always raises
  364. if sy == IDENT:
  365. if systring in self.keywords:
  366. if systring == u'print' and print_function in self.context.future_directives:
  367. self.keywords.discard('print')
  368. elif systring == u'exec' and self.context.language_level >= 3:
  369. self.keywords.discard('exec')
  370. else:
  371. sy = systring
  372. systring = self.context.intern_ustring(systring)
  373. self.sy = sy
  374. self.systring = systring
  375. if False: # debug_scanner:
  376. _, line, col = self.position()
  377. if not self.systring or self.sy == self.systring:
  378. t = self.sy
  379. else:
  380. t = "%s %s" % (self.sy, self.systring)
  381. print("--- %3d %2d %s" % (line, col, t))
  382. def peek(self):
  383. saved = self.sy, self.systring
  384. self.next()
  385. next = self.sy, self.systring
  386. self.unread(*next)
  387. self.sy, self.systring = saved
  388. return next
  389. def put_back(self, sy, systring):
  390. self.unread(self.sy, self.systring)
  391. self.sy = sy
  392. self.systring = systring
  393. def unread(self, token, value):
  394. # This method should be added to Plex
  395. self.queue.insert(0, (token, value))
  396. def error(self, message, pos=None, fatal=True):
  397. if pos is None:
  398. pos = self.position()
  399. if self.sy == 'INDENT':
  400. error(pos, "Possible inconsistent indentation")
  401. err = error(pos, message)
  402. if fatal: raise err
  403. def expect(self, what, message=None):
  404. if self.sy == what:
  405. self.next()
  406. else:
  407. self.expected(what, message)
  408. def expect_keyword(self, what, message=None):
  409. if self.sy == IDENT and self.systring == what:
  410. self.next()
  411. else:
  412. self.expected(what, message)
  413. def expected(self, what, message=None):
  414. if message:
  415. self.error(message)
  416. else:
  417. if self.sy == IDENT:
  418. found = self.systring
  419. else:
  420. found = self.sy
  421. self.error("Expected '%s', found '%s'" % (what, found))
  422. def expect_indent(self):
  423. self.expect('INDENT', "Expected an increase in indentation level")
  424. def expect_dedent(self):
  425. self.expect('DEDENT', "Expected a decrease in indentation level")
  426. def expect_newline(self, message="Expected a newline", ignore_semicolon=False):
  427. # Expect either a newline or end of file
  428. useless_trailing_semicolon = None
  429. if ignore_semicolon and self.sy == ';':
  430. useless_trailing_semicolon = self.position()
  431. self.next()
  432. if self.sy != 'EOF':
  433. self.expect('NEWLINE', message)
  434. if useless_trailing_semicolon is not None:
  435. warning(useless_trailing_semicolon, "useless trailing semicolon")
  436. def enter_async(self):
  437. self.async_enabled += 1
  438. if self.async_enabled == 1:
  439. self.keywords.add('async')
  440. self.keywords.add('await')
  441. def exit_async(self):
  442. assert self.async_enabled > 0
  443. self.async_enabled -= 1
  444. if not self.async_enabled:
  445. self.keywords.discard('await')
  446. self.keywords.discard('async')
  447. if self.sy in ('async', 'await'):
  448. self.sy, self.systring = IDENT, self.context.intern_ustring(self.sy)