cmdline.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. from __future__ import print_function
  2. import sys
  3. import os
  4. import optparse
  5. import cProfile
  6. import inspect
  7. import pkg_resources
  8. import scrapy
  9. from scrapy.crawler import CrawlerProcess
  10. from scrapy.commands import ScrapyCommand
  11. from scrapy.exceptions import UsageError
  12. from scrapy.utils.misc import walk_modules
  13. from scrapy.utils.project import inside_project, get_project_settings
  14. from scrapy.utils.python import garbage_collect
  15. from scrapy.settings.deprecated import check_deprecated_settings
  16. def _iter_command_classes(module_name):
  17. # TODO: add `name` attribute to commands and and merge this function with
  18. # scrapy.utils.spider.iter_spider_classes
  19. for module in walk_modules(module_name):
  20. for obj in vars(module).values():
  21. if inspect.isclass(obj) and \
  22. issubclass(obj, ScrapyCommand) and \
  23. obj.__module__ == module.__name__ and \
  24. not obj == ScrapyCommand:
  25. yield obj
  26. def _get_commands_from_module(module, inproject):
  27. d = {}
  28. for cmd in _iter_command_classes(module):
  29. if inproject or not cmd.requires_project:
  30. cmdname = cmd.__module__.split('.')[-1]
  31. d[cmdname] = cmd()
  32. return d
  33. def _get_commands_from_entry_points(inproject, group='scrapy.commands'):
  34. cmds = {}
  35. for entry_point in pkg_resources.iter_entry_points(group):
  36. obj = entry_point.load()
  37. if inspect.isclass(obj):
  38. cmds[entry_point.name] = obj()
  39. else:
  40. raise Exception("Invalid entry point %s" % entry_point.name)
  41. return cmds
  42. def _get_commands_dict(settings, inproject):
  43. cmds = _get_commands_from_module('scrapy.commands', inproject)
  44. cmds.update(_get_commands_from_entry_points(inproject))
  45. cmds_module = settings['COMMANDS_MODULE']
  46. if cmds_module:
  47. cmds.update(_get_commands_from_module(cmds_module, inproject))
  48. return cmds
  49. def _pop_command_name(argv):
  50. i = 0
  51. for arg in argv[1:]:
  52. if not arg.startswith('-'):
  53. del argv[i]
  54. return arg
  55. i += 1
  56. def _print_header(settings, inproject):
  57. if inproject:
  58. print("Scrapy %s - project: %s\n" % (scrapy.__version__, \
  59. settings['BOT_NAME']))
  60. else:
  61. print("Scrapy %s - no active project\n" % scrapy.__version__)
  62. def _print_commands(settings, inproject):
  63. _print_header(settings, inproject)
  64. print("Usage:")
  65. print(" scrapy <command> [options] [args]\n")
  66. print("Available commands:")
  67. cmds = _get_commands_dict(settings, inproject)
  68. for cmdname, cmdclass in sorted(cmds.items()):
  69. print(" %-13s %s" % (cmdname, cmdclass.short_desc()))
  70. if not inproject:
  71. print()
  72. print(" [ more ] More commands available when run from project directory")
  73. print()
  74. print('Use "scrapy <command> -h" to see more info about a command')
  75. def _print_unknown_command(settings, cmdname, inproject):
  76. _print_header(settings, inproject)
  77. print("Unknown command: %s\n" % cmdname)
  78. print('Use "scrapy" to see available commands')
  79. def _run_print_help(parser, func, *a, **kw):
  80. try:
  81. func(*a, **kw)
  82. except UsageError as e:
  83. if str(e):
  84. parser.error(str(e))
  85. if e.print_help:
  86. parser.print_help()
  87. sys.exit(2)
  88. def execute(argv=None, settings=None):
  89. if argv is None:
  90. argv = sys.argv
  91. if settings is None:
  92. settings = get_project_settings()
  93. # set EDITOR from environment if available
  94. try:
  95. editor = os.environ['EDITOR']
  96. except KeyError:
  97. pass
  98. else:
  99. settings['EDITOR'] = editor
  100. check_deprecated_settings(settings)
  101. inproject = inside_project()
  102. cmds = _get_commands_dict(settings, inproject)
  103. cmdname = _pop_command_name(argv)
  104. parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
  105. conflict_handler='resolve')
  106. if not cmdname:
  107. _print_commands(settings, inproject)
  108. sys.exit(0)
  109. elif cmdname not in cmds:
  110. _print_unknown_command(settings, cmdname, inproject)
  111. sys.exit(2)
  112. cmd = cmds[cmdname]
  113. parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax())
  114. parser.description = cmd.long_desc()
  115. settings.setdict(cmd.default_settings, priority='command')
  116. cmd.settings = settings
  117. cmd.add_options(parser)
  118. opts, args = parser.parse_args(args=argv[1:])
  119. _run_print_help(parser, cmd.process_options, args, opts)
  120. cmd.crawler_process = CrawlerProcess(settings)
  121. _run_print_help(parser, _run_command, cmd, args, opts)
  122. sys.exit(cmd.exitcode)
  123. def _run_command(cmd, args, opts):
  124. if opts.profile:
  125. _run_command_profiled(cmd, args, opts)
  126. else:
  127. cmd.run(args, opts)
  128. def _run_command_profiled(cmd, args, opts):
  129. if opts.profile:
  130. sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile)
  131. loc = locals()
  132. p = cProfile.Profile()
  133. p.runctx('cmd.run(args, opts)', globals(), loc)
  134. if opts.profile:
  135. p.dump_stats(opts.profile)
  136. if __name__ == '__main__':
  137. try:
  138. execute()
  139. finally:
  140. # Twisted prints errors in DebugInfo.__del__, but PyPy does not run gc.collect()
  141. # on exit: http://doc.pypy.org/en/latest/cpython_differences.html?highlight=gc.collect#differences-related-to-garbage-collection-strategies
  142. garbage_collect()