php.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.php
  4. ~~~~~~~~~~~~~~~~~~~
  5. Lexers for PHP and related languages.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, include, bygroups, default, using, \
  11. this, words
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Other
  14. from pygments.util import get_bool_opt, get_list_opt, iteritems
  15. __all__ = ['ZephirLexer', 'PhpLexer']
  16. class ZephirLexer(RegexLexer):
  17. """
  18. For `Zephir language <http://zephir-lang.com/>`_ source code.
  19. Zephir is a compiled high level language aimed
  20. to the creation of C-extensions for PHP.
  21. .. versionadded:: 2.0
  22. """
  23. name = 'Zephir'
  24. aliases = ['zephir']
  25. filenames = ['*.zep']
  26. zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
  27. zephir_type = ['bit', 'bits', 'string']
  28. flags = re.DOTALL | re.MULTILINE
  29. tokens = {
  30. 'commentsandwhitespace': [
  31. (r'\s+', Text),
  32. (r'//.*?\n', Comment.Single),
  33. (r'/\*.*?\*/', Comment.Multiline)
  34. ],
  35. 'slashstartsregex': [
  36. include('commentsandwhitespace'),
  37. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  38. r'([gim]+\b|\B)', String.Regex, '#pop'),
  39. default('#pop')
  40. ],
  41. 'badregex': [
  42. (r'\n', Text, '#pop')
  43. ],
  44. 'root': [
  45. (r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
  46. include('commentsandwhitespace'),
  47. (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
  48. r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
  49. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  50. (r'[})\].]', Punctuation),
  51. (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
  52. r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
  53. r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
  54. r'empty)\b', Keyword, 'slashstartsregex'),
  55. (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
  56. (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
  57. r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
  58. r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
  59. r'transient|volatile)\b', Keyword.Reserved),
  60. (r'(true|false|null|undefined)\b', Keyword.Constant),
  61. (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
  62. r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
  63. r'window)\b', Name.Builtin),
  64. (r'[$a-zA-Z_][\w\\]*', Name.Other),
  65. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  66. (r'0x[0-9a-fA-F]+', Number.Hex),
  67. (r'[0-9]+', Number.Integer),
  68. (r'"(\\\\|\\"|[^"])*"', String.Double),
  69. (r"'(\\\\|\\'|[^'])*'", String.Single),
  70. ]
  71. }
  72. class PhpLexer(RegexLexer):
  73. """
  74. For `PHP <http://www.php.net/>`_ source code.
  75. For PHP embedded in HTML, use the `HtmlPhpLexer`.
  76. Additional options accepted:
  77. `startinline`
  78. If given and ``True`` the lexer starts highlighting with
  79. php code (i.e.: no starting ``<?php`` required). The default
  80. is ``False``.
  81. `funcnamehighlighting`
  82. If given and ``True``, highlight builtin function names
  83. (default: ``True``).
  84. `disabledmodules`
  85. If given, must be a list of module names whose function names
  86. should not be highlighted. By default all modules are highlighted
  87. except the special ``'unknown'`` module that includes functions
  88. that are known to php but are undocumented.
  89. To get a list of allowed modules have a look into the
  90. `_php_builtins` module:
  91. .. sourcecode:: pycon
  92. >>> from pygments.lexers._php_builtins import MODULES
  93. >>> MODULES.keys()
  94. ['PHP Options/Info', 'Zip', 'dba', ...]
  95. In fact the names of those modules match the module names from
  96. the php documentation.
  97. """
  98. name = 'PHP'
  99. aliases = ['php', 'php3', 'php4', 'php5']
  100. filenames = ['*.php', '*.php[345]', '*.inc']
  101. mimetypes = ['text/x-php']
  102. # Note that a backslash is included in the following two patterns
  103. # PHP uses a backslash as a namespace separator
  104. _ident_char = r'[\\\w]|[^\x00-\x7f]'
  105. _ident_begin = r'(?:[\\_a-z]|[^\x00-\x7f])'
  106. _ident_end = r'(?:' + _ident_char + ')*'
  107. _ident_inner = _ident_begin + _ident_end
  108. flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
  109. tokens = {
  110. 'root': [
  111. (r'<\?(php)?', Comment.Preproc, 'php'),
  112. (r'[^<]+', Other),
  113. (r'<', Other)
  114. ],
  115. 'php': [
  116. (r'\?>', Comment.Preproc, '#pop'),
  117. (r'(<<<)([\'"]?)(' + _ident_inner + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
  118. bygroups(String, String, String.Delimiter, String, String.Delimiter,
  119. Punctuation, Text)),
  120. (r'\s+', Text),
  121. (r'#.*?\n', Comment.Single),
  122. (r'//.*?\n', Comment.Single),
  123. # put the empty comment here, it is otherwise seen as
  124. # the start of a docstring
  125. (r'/\*\*/', Comment.Multiline),
  126. (r'/\*\*.*?\*/', String.Doc),
  127. (r'/\*.*?\*/', Comment.Multiline),
  128. (r'(->|::)(\s*)(' + _ident_inner + ')',
  129. bygroups(Operator, Text, Name.Attribute)),
  130. (r'[~!%^&*+=|:.<>/@-]+', Operator),
  131. (r'\?', Operator), # don't add to the charclass above!
  132. (r'[\[\]{}();,]+', Punctuation),
  133. (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
  134. (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
  135. (r'(function)(\s+)(&?)(\s*)',
  136. bygroups(Keyword, Text, Operator, Text), 'functionname'),
  137. (r'(const)(\s+)(' + _ident_inner + ')',
  138. bygroups(Keyword, Text, Name.Constant)),
  139. (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
  140. r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
  141. r'FALSE|print|for|require|continue|foreach|require_once|'
  142. r'declare|return|default|static|do|switch|die|stdClass|'
  143. r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
  144. r'virtual|endfor|include_once|while|endforeach|global|'
  145. r'endif|list|endswitch|new|endwhile|not|'
  146. r'array|E_ALL|NULL|final|php_user_filter|interface|'
  147. r'implements|public|private|protected|abstract|clone|try|'
  148. r'catch|throw|this|use|namespace|trait|yield|'
  149. r'finally)\b', Keyword),
  150. (r'(true|false|null)\b', Keyword.Constant),
  151. include('magicconstants'),
  152. (r'\$\{\$+' + _ident_inner + '\}', Name.Variable),
  153. (r'\$+' + _ident_inner, Name.Variable),
  154. (_ident_inner, Name.Other),
  155. (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
  156. (r'\d+e[+-]?[0-9]+', Number.Float),
  157. (r'0[0-7]+', Number.Oct),
  158. (r'0x[a-f0-9]+', Number.Hex),
  159. (r'\d+', Number.Integer),
  160. (r'0b[01]+', Number.Bin),
  161. (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
  162. (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
  163. (r'"', String.Double, 'string'),
  164. ],
  165. 'magicfuncs': [
  166. # source: http://php.net/manual/en/language.oop5.magic.php
  167. (words((
  168. '__construct', '__destruct', '__call', '__callStatic', '__get', '__set',
  169. '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke',
  170. '__set_state', '__clone', '__debugInfo',), suffix=r'\b'),
  171. Name.Function.Magic),
  172. ],
  173. 'magicconstants': [
  174. # source: http://php.net/manual/en/language.constants.predefined.php
  175. (words((
  176. '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__',
  177. '__TRAIT__', '__METHOD__', '__NAMESPACE__',),
  178. suffix=r'\b'),
  179. Name.Constant),
  180. ],
  181. 'classname': [
  182. (_ident_inner, Name.Class, '#pop')
  183. ],
  184. 'functionname': [
  185. include('magicfuncs'),
  186. (_ident_inner, Name.Function, '#pop'),
  187. default('#pop')
  188. ],
  189. 'string': [
  190. (r'"', String.Double, '#pop'),
  191. (r'[^{$"\\]+', String.Double),
  192. (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
  193. (r'\$' + _ident_inner + '(\[\S+?\]|->' + _ident_inner + ')?',
  194. String.Interpol),
  195. (r'(\{\$\{)(.*?)(\}\})',
  196. bygroups(String.Interpol, using(this, _startinline=True),
  197. String.Interpol)),
  198. (r'(\{)(\$.*?)(\})',
  199. bygroups(String.Interpol, using(this, _startinline=True),
  200. String.Interpol)),
  201. (r'(\$\{)(\S+)(\})',
  202. bygroups(String.Interpol, Name.Variable, String.Interpol)),
  203. (r'[${\\]', String.Double)
  204. ],
  205. }
  206. def __init__(self, **options):
  207. self.funcnamehighlighting = get_bool_opt(
  208. options, 'funcnamehighlighting', True)
  209. self.disabledmodules = get_list_opt(
  210. options, 'disabledmodules', ['unknown'])
  211. self.startinline = get_bool_opt(options, 'startinline', False)
  212. # private option argument for the lexer itself
  213. if '_startinline' in options:
  214. self.startinline = options.pop('_startinline')
  215. # collect activated functions in a set
  216. self._functions = set()
  217. if self.funcnamehighlighting:
  218. from pygments.lexers._php_builtins import MODULES
  219. for key, value in iteritems(MODULES):
  220. if key not in self.disabledmodules:
  221. self._functions.update(value)
  222. RegexLexer.__init__(self, **options)
  223. def get_tokens_unprocessed(self, text):
  224. stack = ['root']
  225. if self.startinline:
  226. stack.append('php')
  227. for index, token, value in \
  228. RegexLexer.get_tokens_unprocessed(self, text, stack):
  229. if token is Name.Other:
  230. if value in self._functions:
  231. yield index, Name.Builtin, value
  232. continue
  233. yield index, token, value
  234. def analyse_text(text):
  235. rv = 0.0
  236. if re.search(r'<\?(?!xml)', text):
  237. rv += 0.3
  238. return rv