julia.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.julia
  4. ~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for the Julia language.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
  11. words, include
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic
  14. from pygments.util import shebang_matches, unirange
  15. __all__ = ['JuliaLexer', 'JuliaConsoleLexer']
  16. allowed_variable = (
  17. u'(?:[a-zA-Z_\u00A1-\uffff]|%s)(?:[a-zA-Z_0-9\u00A1-\uffff]|%s)*!*' %
  18. ((unirange(0x10000, 0x10ffff),) * 2))
  19. class JuliaLexer(RegexLexer):
  20. """
  21. For `Julia <http://julialang.org/>`_ source code.
  22. .. versionadded:: 1.6
  23. """
  24. name = 'Julia'
  25. aliases = ['julia', 'jl']
  26. filenames = ['*.jl']
  27. mimetypes = ['text/x-julia', 'application/x-julia']
  28. flags = re.MULTILINE | re.UNICODE
  29. tokens = {
  30. 'root': [
  31. (r'\n', Text),
  32. (r'[^\S\n]+', Text),
  33. (r'#=', Comment.Multiline, "blockcomment"),
  34. (r'#.*$', Comment),
  35. (r'[\[\]{}(),;]', Punctuation),
  36. # keywords
  37. (r'in\b', Keyword.Pseudo),
  38. (r'(true|false)\b', Keyword.Constant),
  39. (r'(local|global|const)\b', Keyword.Declaration),
  40. (words([
  41. 'function', 'type', 'typealias', 'abstract', 'immutable',
  42. 'baremodule', 'begin', 'bitstype', 'break', 'catch', 'ccall',
  43. 'continue', 'do', 'else', 'elseif', 'end', 'export', 'finally',
  44. 'for', 'if', 'import', 'importall', 'let', 'macro', 'module',
  45. 'quote', 'return', 'try', 'using', 'while'],
  46. suffix=r'\b'), Keyword),
  47. # NOTE
  48. # Patterns below work only for definition sites and thus hardly reliable.
  49. #
  50. # functions
  51. # (r'(function)(\s+)(' + allowed_variable + ')',
  52. # bygroups(Keyword, Text, Name.Function)),
  53. #
  54. # types
  55. # (r'(type|typealias|abstract|immutable)(\s+)(' + allowed_variable + ')',
  56. # bygroups(Keyword, Text, Name.Class)),
  57. # type names
  58. (words([
  59. 'ANY', 'ASCIIString', 'AbstractArray', 'AbstractChannel',
  60. 'AbstractFloat', 'AbstractMatrix', 'AbstractRNG',
  61. 'AbstractSparseArray', 'AbstractSparseMatrix',
  62. 'AbstractSparseVector', 'AbstractString', 'AbstractVecOrMat',
  63. 'AbstractVector', 'Any', 'ArgumentError', 'Array',
  64. 'AssertionError', 'Associative', 'Base64DecodePipe',
  65. 'Base64EncodePipe', 'Bidiagonal', 'BigFloat', 'BigInt',
  66. 'BitArray', 'BitMatrix', 'BitVector', 'Bool', 'BoundsError',
  67. 'Box', 'BufferStream', 'CapturedException', 'CartesianIndex',
  68. 'CartesianRange', 'Cchar', 'Cdouble', 'Cfloat', 'Channel',
  69. 'Char', 'Cint', 'Cintmax_t', 'Clong', 'Clonglong',
  70. 'ClusterManager', 'Cmd', 'Coff_t', 'Colon', 'Complex',
  71. 'Complex128', 'Complex32', 'Complex64', 'CompositeException',
  72. 'Condition', 'Cptrdiff_t', 'Cshort', 'Csize_t', 'Cssize_t',
  73. 'Cstring', 'Cuchar', 'Cuint', 'Cuintmax_t', 'Culong',
  74. 'Culonglong', 'Cushort', 'Cwchar_t', 'Cwstring', 'DataType',
  75. 'Date', 'DateTime', 'DenseArray', 'DenseMatrix',
  76. 'DenseVecOrMat', 'DenseVector', 'Diagonal', 'Dict',
  77. 'DimensionMismatch', 'Dims', 'DirectIndexString', 'Display',
  78. 'DivideError', 'DomainError', 'EOFError', 'EachLine', 'Enum',
  79. 'Enumerate', 'ErrorException', 'Exception', 'Expr',
  80. 'Factorization', 'FileMonitor', 'FileOffset', 'Filter',
  81. 'Float16', 'Float32', 'Float64', 'FloatRange', 'Function',
  82. 'GenSym', 'GlobalRef', 'GotoNode', 'HTML', 'Hermitian', 'IO',
  83. 'IOBuffer', 'IOStream', 'IPv4', 'IPv6', 'InexactError',
  84. 'InitError', 'Int', 'Int128', 'Int16', 'Int32', 'Int64', 'Int8',
  85. 'IntSet', 'Integer', 'InterruptException', 'IntrinsicFunction',
  86. 'InvalidStateException', 'Irrational', 'KeyError', 'LabelNode',
  87. 'LambdaStaticData', 'LinSpace', 'LineNumberNode', 'LoadError',
  88. 'LocalProcess', 'LowerTriangular', 'MIME', 'Matrix',
  89. 'MersenneTwister', 'Method', 'MethodError', 'MethodTable',
  90. 'Module', 'NTuple', 'NewvarNode', 'NullException', 'Nullable',
  91. 'Number', 'ObjectIdDict', 'OrdinalRange', 'OutOfMemoryError',
  92. 'OverflowError', 'Pair', 'ParseError', 'PartialQuickSort',
  93. 'Pipe', 'PollingFileWatcher', 'ProcessExitedException',
  94. 'ProcessGroup', 'Ptr', 'QuoteNode', 'RandomDevice', 'Range',
  95. 'Rational', 'RawFD', 'ReadOnlyMemoryError', 'Real',
  96. 'ReentrantLock', 'Ref', 'Regex', 'RegexMatch',
  97. 'RemoteException', 'RemoteRef', 'RepString', 'RevString',
  98. 'RopeString', 'RoundingMode', 'SegmentationFault',
  99. 'SerializationState', 'Set', 'SharedArray', 'SharedMatrix',
  100. 'SharedVector', 'Signed', 'SimpleVector', 'SparseMatrixCSC',
  101. 'StackOverflowError', 'StatStruct', 'StepRange', 'StridedArray',
  102. 'StridedMatrix', 'StridedVecOrMat', 'StridedVector', 'SubArray',
  103. 'SubString', 'SymTridiagonal', 'Symbol', 'SymbolNode',
  104. 'Symmetric', 'SystemError', 'TCPSocket', 'Task', 'Text',
  105. 'TextDisplay', 'Timer', 'TopNode', 'Tridiagonal', 'Tuple',
  106. 'Type', 'TypeConstructor', 'TypeError', 'TypeName', 'TypeVar',
  107. 'UDPSocket', 'UInt', 'UInt128', 'UInt16', 'UInt32', 'UInt64',
  108. 'UInt8', 'UTF16String', 'UTF32String', 'UTF8String',
  109. 'UndefRefError', 'UndefVarError', 'UnicodeError', 'UniformScaling',
  110. 'Union', 'UnitRange', 'Unsigned', 'UpperTriangular', 'Val',
  111. 'Vararg', 'VecOrMat', 'Vector', 'VersionNumber', 'Void', 'WString',
  112. 'WeakKeyDict', 'WeakRef', 'WorkerConfig', 'Zip'], suffix=r'\b'),
  113. Keyword.Type),
  114. # builtins
  115. (words([
  116. u'ARGS', u'CPU_CORES', u'C_NULL', u'DevNull', u'ENDIAN_BOM',
  117. u'ENV', u'I', u'Inf', u'Inf16', u'Inf32', u'Inf64',
  118. u'InsertionSort', u'JULIA_HOME', u'LOAD_PATH', u'MergeSort',
  119. u'NaN', u'NaN16', u'NaN32', u'NaN64', u'OS_NAME',
  120. u'QuickSort', u'RoundDown', u'RoundFromZero', u'RoundNearest',
  121. u'RoundNearestTiesAway', u'RoundNearestTiesUp',
  122. u'RoundToZero', u'RoundUp', u'STDERR', u'STDIN', u'STDOUT',
  123. u'VERSION', u'WORD_SIZE', u'catalan', u'e', u'eu',
  124. u'eulergamma', u'golden', u'im', u'nothing', u'pi', u'γ',
  125. u'π', u'φ'],
  126. suffix=r'\b'), Name.Builtin),
  127. # operators
  128. # see: https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm
  129. (words([
  130. # prec-assignment
  131. u'=', u':=', u'+=', u'-=', u'*=', u'/=', u'//=', u'.//=', u'.*=', u'./=',
  132. u'\=', u'.\=', u'^=', u'.^=', u'÷=', u'.÷=', u'%=', u'.%=', u'|=', u'&=',
  133. u'$=', u'=>', u'<<=', u'>>=', u'>>>=', u'~', u'.+=', u'.-=',
  134. # prec-conditional
  135. u'?',
  136. # prec-arrow
  137. u'--', u'-->',
  138. # prec-lazy-or
  139. u'||',
  140. # prec-lazy-and
  141. u'&&',
  142. # prec-comparison
  143. u'>', u'<', u'>=', u'≥', u'<=', u'≤', u'==', u'===', u'≡', u'!=', u'≠',
  144. u'!==', u'≢', u'.>', u'.<', u'.>=', u'.≥', u'.<=', u'.≤', u'.==', u'.!=',
  145. u'.≠', u'.=', u'.!', u'<:', u'>:', u'∈', u'∉', u'∋', u'∌', u'⊆',
  146. u'⊈', u'⊂',
  147. u'⊄', u'⊊',
  148. # prec-pipe
  149. u'|>', u'<|',
  150. # prec-colon
  151. u':',
  152. # prec-plus
  153. u'+', u'-', u'.+', u'.-', u'|', u'∪', u'$',
  154. # prec-bitshift
  155. u'<<', u'>>', u'>>>', u'.<<', u'.>>', u'.>>>',
  156. # prec-times
  157. u'*', u'/', u'./', u'÷', u'.÷', u'%', u'⋅', u'.%', u'.*', u'\\', u'.\\', u'&', u'∩',
  158. # prec-rational
  159. u'//', u'.//',
  160. # prec-power
  161. u'^', u'.^',
  162. # prec-decl
  163. u'::',
  164. # prec-dot
  165. u'.',
  166. # unary op
  167. u'+', u'-', u'!', u'~', u'√', u'∛', u'∜'
  168. ]), Operator),
  169. # chars
  170. (r"'(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,3}|\\u[a-fA-F0-9]{1,4}|"
  171. r"\\U[a-fA-F0-9]{1,6}|[^\\\'\n])'", String.Char),
  172. # try to match trailing transpose
  173. (r'(?<=[.\w)\]])\'+', Operator),
  174. # strings
  175. (r'"""', String, 'tqstring'),
  176. (r'"', String, 'string'),
  177. # regular expressions
  178. (r'r"""', String.Regex, 'tqregex'),
  179. (r'r"', String.Regex, 'regex'),
  180. # backticks
  181. (r'`', String.Backtick, 'command'),
  182. # names
  183. (allowed_variable, Name),
  184. (r'@' + allowed_variable, Name.Decorator),
  185. # numbers
  186. (r'(\d+(_\d+)+\.\d*|\d*\.\d+(_\d+)+)([eEf][+-]?[0-9]+)?', Number.Float),
  187. (r'(\d+\.\d*|\d*\.\d+)([eEf][+-]?[0-9]+)?', Number.Float),
  188. (r'\d+(_\d+)+[eEf][+-]?[0-9]+', Number.Float),
  189. (r'\d+[eEf][+-]?[0-9]+', Number.Float),
  190. (r'0b[01]+(_[01]+)+', Number.Bin),
  191. (r'0b[01]+', Number.Bin),
  192. (r'0o[0-7]+(_[0-7]+)+', Number.Oct),
  193. (r'0o[0-7]+', Number.Oct),
  194. (r'0x[a-fA-F0-9]+(_[a-fA-F0-9]+)+', Number.Hex),
  195. (r'0x[a-fA-F0-9]+', Number.Hex),
  196. (r'\d+(_\d+)+', Number.Integer),
  197. (r'\d+', Number.Integer)
  198. ],
  199. "blockcomment": [
  200. (r'[^=#]', Comment.Multiline),
  201. (r'#=', Comment.Multiline, '#push'),
  202. (r'=#', Comment.Multiline, '#pop'),
  203. (r'[=#]', Comment.Multiline),
  204. ],
  205. 'string': [
  206. (r'"', String, '#pop'),
  207. # FIXME: This escape pattern is not perfect.
  208. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  209. # Interpolation is defined as "$" followed by the shortest full
  210. # expression, which is something we can't parse.
  211. # Include the most common cases here: $word, and $(paren'd expr).
  212. (r'\$' + allowed_variable, String.Interpol),
  213. # (r'\$[a-zA-Z_]+', String.Interpol),
  214. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  215. # @printf and @sprintf formats
  216. (r'%[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?[hlL]?[E-GXc-giorsux%]',
  217. String.Interpol),
  218. (r'.|\s', String),
  219. ],
  220. 'tqstring': [
  221. (r'"""', String, '#pop'),
  222. (r'\\([\\"\'$nrbtfav]|(x|u|U)[a-fA-F0-9]+|\d+)', String.Escape),
  223. (r'\$' + allowed_variable, String.Interpol),
  224. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  225. (r'.|\s', String),
  226. ],
  227. 'regex': [
  228. (r'"', String.Regex, '#pop'),
  229. (r'\\"', String.Regex),
  230. (r'.|\s', String.Regex),
  231. ],
  232. 'tqregex': [
  233. (r'"""', String.Regex, '#pop'),
  234. (r'.|\s', String.Regex),
  235. ],
  236. 'command': [
  237. (r'`', String.Backtick, '#pop'),
  238. (r'\$' + allowed_variable, String.Interpol),
  239. (r'(\$)(\()', bygroups(String.Interpol, Punctuation), 'in-intp'),
  240. (r'.|\s', String.Backtick)
  241. ],
  242. 'in-intp': [
  243. (r'\(', Punctuation, '#push'),
  244. (r'\)', Punctuation, '#pop'),
  245. include('root'),
  246. ]
  247. }
  248. def analyse_text(text):
  249. return shebang_matches(text, r'julia')
  250. class JuliaConsoleLexer(Lexer):
  251. """
  252. For Julia console sessions. Modeled after MatlabSessionLexer.
  253. .. versionadded:: 1.6
  254. """
  255. name = 'Julia console'
  256. aliases = ['jlcon']
  257. def get_tokens_unprocessed(self, text):
  258. jllexer = JuliaLexer(**self.options)
  259. start = 0
  260. curcode = ''
  261. insertions = []
  262. output = False
  263. error = False
  264. for line in text.splitlines(True):
  265. if line.startswith('julia>'):
  266. insertions.append((len(curcode), [(0, Generic.Prompt, line[:6])]))
  267. curcode += line[6:]
  268. output = False
  269. error = False
  270. elif line.startswith('help?>') or line.startswith('shell>'):
  271. yield start, Generic.Prompt, line[:6]
  272. yield start + 6, Text, line[6:]
  273. output = False
  274. error = False
  275. elif line.startswith(' ') and not output:
  276. insertions.append((len(curcode), [(0, Text, line[:6])]))
  277. curcode += line[6:]
  278. else:
  279. if curcode:
  280. for item in do_insertions(
  281. insertions, jllexer.get_tokens_unprocessed(curcode)):
  282. yield item
  283. curcode = ''
  284. insertions = []
  285. if line.startswith('ERROR: ') or error:
  286. yield start, Generic.Error, line
  287. error = True
  288. else:
  289. yield start, Generic.Output, line
  290. output = True
  291. start += len(line)
  292. if curcode:
  293. for item in do_insertions(
  294. insertions, jllexer.get_tokens_unprocessed(curcode)):
  295. yield item