cmdline.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.cmdline
  4. ~~~~~~~~~~~~~~~~
  5. Command line interface.
  6. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. from __future__ import print_function
  10. import sys
  11. import getopt
  12. from textwrap import dedent
  13. from pygments import __version__, highlight
  14. from pygments.util import ClassNotFound, OptionError, docstring_headline, \
  15. guess_decode, guess_decode_from_terminal, terminal_encoding
  16. from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
  17. load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
  18. from pygments.lexers.special import TextLexer
  19. from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
  20. from pygments.formatters import get_all_formatters, get_formatter_by_name, \
  21. load_formatter_from_file, get_formatter_for_filename, find_formatter_class
  22. from pygments.formatters.terminal import TerminalFormatter
  23. from pygments.filters import get_all_filters, find_filter_class
  24. from pygments.styles import get_all_styles, get_style_by_name
  25. USAGE = """\
  26. Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>]
  27. [-O <options>] [-P <option=value>] [-s] [-v] [-x] [-o <outfile>] [<infile>]
  28. %s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>]
  29. %s -L [<which> ...]
  30. %s -N <filename>
  31. %s -H <type> <name>
  32. %s -h | -V
  33. Highlight the input file and write the result to <outfile>.
  34. If no input file is given, use stdin, if -o is not given, use stdout.
  35. If -s is passed, lexing will be done in "streaming" mode, reading and
  36. highlighting one line at a time. This will only work properly with
  37. lexers that have no constructs spanning multiple lines!
  38. <lexer> is a lexer name (query all lexer names with -L). If -l is not
  39. given, the lexer is guessed from the extension of the input file name
  40. (this obviously doesn't work if the input is stdin). If -g is passed,
  41. attempt to guess the lexer from the file contents, or pass through as
  42. plain text if this fails (this can work for stdin).
  43. Likewise, <formatter> is a formatter name, and will be guessed from
  44. the extension of the output file name. If no output file is given,
  45. the terminal formatter will be used by default.
  46. The additional option -x allows custom lexers and formatters to be
  47. loaded from a .py file relative to the current working directory. For
  48. example, ``-l ./customlexer.py -x``. By default, this option expects a
  49. file with a class named CustomLexer or CustomFormatter; you can also
  50. specify your own class name with a colon (``-l ./lexer.py:MyLexer``).
  51. Users should be very careful not to use this option with untrusted files,
  52. because it will import and run them.
  53. With the -O option, you can give the lexer and formatter a comma-
  54. separated list of options, e.g. ``-O bg=light,python=cool``.
  55. The -P option adds lexer and formatter options like the -O option, but
  56. you can only give one option per -P. That way, the option value may
  57. contain commas and equals signs, which it can't with -O, e.g.
  58. ``-P "heading=Pygments, the Python highlighter".
  59. With the -F option, you can add filters to the token stream, you can
  60. give options in the same way as for -O after a colon (note: there must
  61. not be spaces around the colon).
  62. The -O, -P and -F options can be given multiple times.
  63. With the -S option, print out style definitions for style <style>
  64. for formatter <formatter>. The argument given by -a is formatter
  65. dependent.
  66. The -L option lists lexers, formatters, styles or filters -- set
  67. `which` to the thing you want to list (e.g. "styles"), or omit it to
  68. list everything.
  69. The -N option guesses and prints out a lexer name based solely on
  70. the given filename. It does not take input or highlight anything.
  71. If no specific lexer can be determined "text" is returned.
  72. The -H option prints detailed help for the object <name> of type <type>,
  73. where <type> is one of "lexer", "formatter" or "filter".
  74. The -s option processes lines one at a time until EOF, rather than
  75. waiting to process the entire file. This only works for stdin, and
  76. is intended for streaming input such as you get from 'tail -f'.
  77. Example usage: "tail -f sql.log | pygmentize -s -l sql"
  78. The -v option prints a detailed traceback on unhandled exceptions,
  79. which is useful for debugging and bug reports.
  80. The -h option prints this help.
  81. The -V option prints the package version.
  82. """
  83. def _parse_options(o_strs):
  84. opts = {}
  85. if not o_strs:
  86. return opts
  87. for o_str in o_strs:
  88. if not o_str.strip():
  89. continue
  90. o_args = o_str.split(',')
  91. for o_arg in o_args:
  92. o_arg = o_arg.strip()
  93. try:
  94. o_key, o_val = o_arg.split('=', 1)
  95. o_key = o_key.strip()
  96. o_val = o_val.strip()
  97. except ValueError:
  98. opts[o_arg] = True
  99. else:
  100. opts[o_key] = o_val
  101. return opts
  102. def _parse_filters(f_strs):
  103. filters = []
  104. if not f_strs:
  105. return filters
  106. for f_str in f_strs:
  107. if ':' in f_str:
  108. fname, fopts = f_str.split(':', 1)
  109. filters.append((fname, _parse_options([fopts])))
  110. else:
  111. filters.append((f_str, {}))
  112. return filters
  113. def _print_help(what, name):
  114. try:
  115. if what == 'lexer':
  116. cls = get_lexer_by_name(name)
  117. print("Help on the %s lexer:" % cls.name)
  118. print(dedent(cls.__doc__))
  119. elif what == 'formatter':
  120. cls = find_formatter_class(name)
  121. print("Help on the %s formatter:" % cls.name)
  122. print(dedent(cls.__doc__))
  123. elif what == 'filter':
  124. cls = find_filter_class(name)
  125. print("Help on the %s filter:" % name)
  126. print(dedent(cls.__doc__))
  127. return 0
  128. except (AttributeError, ValueError):
  129. print("%s not found!" % what, file=sys.stderr)
  130. return 1
  131. def _print_list(what):
  132. if what == 'lexer':
  133. print()
  134. print("Lexers:")
  135. print("~~~~~~~")
  136. info = []
  137. for fullname, names, exts, _ in get_all_lexers():
  138. tup = (', '.join(names)+':', fullname,
  139. exts and '(filenames ' + ', '.join(exts) + ')' or '')
  140. info.append(tup)
  141. info.sort()
  142. for i in info:
  143. print(('* %s\n %s %s') % i)
  144. elif what == 'formatter':
  145. print()
  146. print("Formatters:")
  147. print("~~~~~~~~~~~")
  148. info = []
  149. for cls in get_all_formatters():
  150. doc = docstring_headline(cls)
  151. tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
  152. '(filenames ' + ', '.join(cls.filenames) + ')' or '')
  153. info.append(tup)
  154. info.sort()
  155. for i in info:
  156. print(('* %s\n %s %s') % i)
  157. elif what == 'filter':
  158. print()
  159. print("Filters:")
  160. print("~~~~~~~~")
  161. for name in get_all_filters():
  162. cls = find_filter_class(name)
  163. print("* " + name + ':')
  164. print(" %s" % docstring_headline(cls))
  165. elif what == 'style':
  166. print()
  167. print("Styles:")
  168. print("~~~~~~~")
  169. for name in get_all_styles():
  170. cls = get_style_by_name(name)
  171. print("* " + name + ':')
  172. print(" %s" % docstring_headline(cls))
  173. def main_inner(popts, args, usage):
  174. opts = {}
  175. O_opts = []
  176. P_opts = []
  177. F_opts = []
  178. for opt, arg in popts:
  179. if opt == '-O':
  180. O_opts.append(arg)
  181. elif opt == '-P':
  182. P_opts.append(arg)
  183. elif opt == '-F':
  184. F_opts.append(arg)
  185. opts[opt] = arg
  186. if opts.pop('-h', None) is not None:
  187. print(usage)
  188. return 0
  189. if opts.pop('-V', None) is not None:
  190. print('Pygments version %s, (c) 2006-2017 by Georg Brandl.' % __version__)
  191. return 0
  192. # handle ``pygmentize -L``
  193. L_opt = opts.pop('-L', None)
  194. if L_opt is not None:
  195. if opts:
  196. print(usage, file=sys.stderr)
  197. return 2
  198. # print version
  199. main(['', '-V'])
  200. if not args:
  201. args = ['lexer', 'formatter', 'filter', 'style']
  202. for arg in args:
  203. _print_list(arg.rstrip('s'))
  204. return 0
  205. # handle ``pygmentize -H``
  206. H_opt = opts.pop('-H', None)
  207. if H_opt is not None:
  208. if opts or len(args) != 2:
  209. print(usage, file=sys.stderr)
  210. return 2
  211. what, name = args # pylint: disable=unbalanced-tuple-unpacking
  212. if what not in ('lexer', 'formatter', 'filter'):
  213. print(usage, file=sys.stderr)
  214. return 2
  215. return _print_help(what, name)
  216. # parse -O options
  217. parsed_opts = _parse_options(O_opts)
  218. opts.pop('-O', None)
  219. # parse -P options
  220. for p_opt in P_opts:
  221. try:
  222. name, value = p_opt.split('=', 1)
  223. except ValueError:
  224. parsed_opts[p_opt] = True
  225. else:
  226. parsed_opts[name] = value
  227. opts.pop('-P', None)
  228. # encodings
  229. inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
  230. outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
  231. # handle ``pygmentize -N``
  232. infn = opts.pop('-N', None)
  233. if infn is not None:
  234. lexer = find_lexer_class_for_filename(infn)
  235. if lexer is None:
  236. lexer = TextLexer
  237. print(lexer.aliases[0])
  238. return 0
  239. # handle ``pygmentize -S``
  240. S_opt = opts.pop('-S', None)
  241. a_opt = opts.pop('-a', None)
  242. if S_opt is not None:
  243. f_opt = opts.pop('-f', None)
  244. if not f_opt:
  245. print(usage, file=sys.stderr)
  246. return 2
  247. if opts or args:
  248. print(usage, file=sys.stderr)
  249. return 2
  250. try:
  251. parsed_opts['style'] = S_opt
  252. fmter = get_formatter_by_name(f_opt, **parsed_opts)
  253. except ClassNotFound as err:
  254. print(err, file=sys.stderr)
  255. return 1
  256. print(fmter.get_style_defs(a_opt or ''))
  257. return 0
  258. # if no -S is given, -a is not allowed
  259. if a_opt is not None:
  260. print(usage, file=sys.stderr)
  261. return 2
  262. # parse -F options
  263. F_opts = _parse_filters(F_opts)
  264. opts.pop('-F', None)
  265. allow_custom_lexer_formatter = False
  266. # -x: allow custom (eXternal) lexers and formatters
  267. if opts.pop('-x', None) is not None:
  268. allow_custom_lexer_formatter = True
  269. # select lexer
  270. lexer = None
  271. # given by name?
  272. lexername = opts.pop('-l', None)
  273. if lexername:
  274. # custom lexer, located relative to user's cwd
  275. if allow_custom_lexer_formatter and '.py' in lexername:
  276. try:
  277. if ':' in lexername:
  278. filename, name = lexername.rsplit(':', 1)
  279. lexer = load_lexer_from_file(filename, name,
  280. **parsed_opts)
  281. else:
  282. lexer = load_lexer_from_file(lexername, **parsed_opts)
  283. except ClassNotFound as err:
  284. print('Error:', err, file=sys.stderr)
  285. return 1
  286. else:
  287. try:
  288. lexer = get_lexer_by_name(lexername, **parsed_opts)
  289. except (OptionError, ClassNotFound) as err:
  290. print('Error:', err, file=sys.stderr)
  291. return 1
  292. # read input code
  293. code = None
  294. if args:
  295. if len(args) > 1:
  296. print(usage, file=sys.stderr)
  297. return 2
  298. if '-s' in opts:
  299. print('Error: -s option not usable when input file specified',
  300. file=sys.stderr)
  301. return 2
  302. infn = args[0]
  303. try:
  304. with open(infn, 'rb') as infp:
  305. code = infp.read()
  306. except Exception as err:
  307. print('Error: cannot read infile:', err, file=sys.stderr)
  308. return 1
  309. if not inencoding:
  310. code, inencoding = guess_decode(code)
  311. # do we have to guess the lexer?
  312. if not lexer:
  313. try:
  314. lexer = get_lexer_for_filename(infn, code, **parsed_opts)
  315. except ClassNotFound as err:
  316. if '-g' in opts:
  317. try:
  318. lexer = guess_lexer(code, **parsed_opts)
  319. except ClassNotFound:
  320. lexer = TextLexer(**parsed_opts)
  321. else:
  322. print('Error:', err, file=sys.stderr)
  323. return 1
  324. except OptionError as err:
  325. print('Error:', err, file=sys.stderr)
  326. return 1
  327. elif '-s' not in opts: # treat stdin as full file (-s support is later)
  328. # read code from terminal, always in binary mode since we want to
  329. # decode ourselves and be tolerant with it
  330. if sys.version_info > (3,):
  331. # Python 3: we have to use .buffer to get a binary stream
  332. code = sys.stdin.buffer.read()
  333. else:
  334. code = sys.stdin.read()
  335. if not inencoding:
  336. code, inencoding = guess_decode_from_terminal(code, sys.stdin)
  337. # else the lexer will do the decoding
  338. if not lexer:
  339. try:
  340. lexer = guess_lexer(code, **parsed_opts)
  341. except ClassNotFound:
  342. lexer = TextLexer(**parsed_opts)
  343. else: # -s option needs a lexer with -l
  344. if not lexer:
  345. print('Error: when using -s a lexer has to be selected with -l',
  346. file=sys.stderr)
  347. return 2
  348. # process filters
  349. for fname, fopts in F_opts:
  350. try:
  351. lexer.add_filter(fname, **fopts)
  352. except ClassNotFound as err:
  353. print('Error:', err, file=sys.stderr)
  354. return 1
  355. # select formatter
  356. outfn = opts.pop('-o', None)
  357. fmter = opts.pop('-f', None)
  358. if fmter:
  359. # custom formatter, located relative to user's cwd
  360. if allow_custom_lexer_formatter and '.py' in fmter:
  361. try:
  362. if ':' in fmter:
  363. file, fmtername = fmter.rsplit(':', 1)
  364. fmter = load_formatter_from_file(file, fmtername,
  365. **parsed_opts)
  366. else:
  367. fmter = load_formatter_from_file(fmter, **parsed_opts)
  368. except ClassNotFound as err:
  369. print('Error:', err, file=sys.stderr)
  370. return 1
  371. else:
  372. try:
  373. fmter = get_formatter_by_name(fmter, **parsed_opts)
  374. except (OptionError, ClassNotFound) as err:
  375. print('Error:', err, file=sys.stderr)
  376. return 1
  377. if outfn:
  378. if not fmter:
  379. try:
  380. fmter = get_formatter_for_filename(outfn, **parsed_opts)
  381. except (OptionError, ClassNotFound) as err:
  382. print('Error:', err, file=sys.stderr)
  383. return 1
  384. try:
  385. outfile = open(outfn, 'wb')
  386. except Exception as err:
  387. print('Error: cannot open outfile:', err, file=sys.stderr)
  388. return 1
  389. else:
  390. if not fmter:
  391. fmter = TerminalFormatter(**parsed_opts)
  392. if sys.version_info > (3,):
  393. # Python 3: we have to use .buffer to get a binary stream
  394. outfile = sys.stdout.buffer
  395. else:
  396. outfile = sys.stdout
  397. # determine output encoding if not explicitly selected
  398. if not outencoding:
  399. if outfn:
  400. # output file? use lexer encoding for now (can still be None)
  401. fmter.encoding = inencoding
  402. else:
  403. # else use terminal encoding
  404. fmter.encoding = terminal_encoding(sys.stdout)
  405. # provide coloring under Windows, if possible
  406. if not outfn and sys.platform in ('win32', 'cygwin') and \
  407. fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
  408. # unfortunately colorama doesn't support binary streams on Py3
  409. if sys.version_info > (3,):
  410. from pygments.util import UnclosingTextIOWrapper
  411. outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
  412. fmter.encoding = None
  413. try:
  414. import colorama.initialise
  415. except ImportError:
  416. pass
  417. else:
  418. outfile = colorama.initialise.wrap_stream(
  419. outfile, convert=None, strip=None, autoreset=False, wrap=True)
  420. # When using the LaTeX formatter and the option `escapeinside` is
  421. # specified, we need a special lexer which collects escaped text
  422. # before running the chosen language lexer.
  423. escapeinside = parsed_opts.get('escapeinside', '')
  424. if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
  425. left = escapeinside[0]
  426. right = escapeinside[1]
  427. lexer = LatexEmbeddedLexer(left, right, lexer)
  428. # ... and do it!
  429. if '-s' not in opts:
  430. # process whole input as per normal...
  431. highlight(code, lexer, fmter, outfile)
  432. return 0
  433. else:
  434. # line by line processing of stdin (eg: for 'tail -f')...
  435. try:
  436. while 1:
  437. if sys.version_info > (3,):
  438. # Python 3: we have to use .buffer to get a binary stream
  439. line = sys.stdin.buffer.readline()
  440. else:
  441. line = sys.stdin.readline()
  442. if not line:
  443. break
  444. if not inencoding:
  445. line = guess_decode_from_terminal(line, sys.stdin)[0]
  446. highlight(line, lexer, fmter, outfile)
  447. if hasattr(outfile, 'flush'):
  448. outfile.flush()
  449. return 0
  450. except KeyboardInterrupt: # pragma: no cover
  451. return 0
  452. def main(args=sys.argv):
  453. """
  454. Main command line entry point.
  455. """
  456. usage = USAGE % ((args[0],) * 6)
  457. try:
  458. popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:vhVHgsx")
  459. except getopt.GetoptError:
  460. print(usage, file=sys.stderr)
  461. return 2
  462. try:
  463. return main_inner(popts, args, usage)
  464. except Exception:
  465. if '-v' in dict(popts):
  466. print(file=sys.stderr)
  467. print('*' * 65, file=sys.stderr)
  468. print('An unhandled exception occurred while highlighting.',
  469. file=sys.stderr)
  470. print('Please report the whole traceback to the issue tracker at',
  471. file=sys.stderr)
  472. print('<https://bitbucket.org/birkenfeld/pygments-main/issues>.',
  473. file=sys.stderr)
  474. print('*' * 65, file=sys.stderr)
  475. print(file=sys.stderr)
  476. raise
  477. import traceback
  478. info = traceback.format_exception(*sys.exc_info())
  479. msg = info[-1].strip()
  480. if len(info) >= 3:
  481. # extract relevant file and position info
  482. msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
  483. print(file=sys.stderr)
  484. print('*** Error while highlighting:', file=sys.stderr)
  485. print(msg, file=sys.stderr)
  486. print('*** If this is a bug you want to report, please rerun with -v.',
  487. file=sys.stderr)
  488. return 1