startproject.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. from __future__ import print_function
  2. import re
  3. import os
  4. import string
  5. from importlib import import_module
  6. from os.path import join, exists, abspath
  7. from shutil import ignore_patterns, move, copy2, copystat
  8. import scrapy
  9. from scrapy.commands import ScrapyCommand
  10. from scrapy.utils.template import render_templatefile, string_camelcase
  11. from scrapy.exceptions import UsageError
  12. TEMPLATES_TO_RENDER = (
  13. ('scrapy.cfg',),
  14. ('${project_name}', 'settings.py.tmpl'),
  15. ('${project_name}', 'items.py.tmpl'),
  16. ('${project_name}', 'pipelines.py.tmpl'),
  17. ('${project_name}', 'middlewares.py.tmpl'),
  18. )
  19. IGNORE = ignore_patterns('*.pyc', '.svn')
  20. class Command(ScrapyCommand):
  21. requires_project = False
  22. default_settings = {'LOG_ENABLED': False,
  23. 'SPIDER_LOADER_WARN_ONLY': True}
  24. def syntax(self):
  25. return "<project_name> [project_dir]"
  26. def short_desc(self):
  27. return "Create new project"
  28. def _is_valid_name(self, project_name):
  29. def _module_exists(module_name):
  30. try:
  31. import_module(module_name)
  32. return True
  33. except ImportError:
  34. return False
  35. if not re.search(r'^[_a-zA-Z]\w*$', project_name):
  36. print('Error: Project names must begin with a letter and contain'\
  37. ' only\nletters, numbers and underscores')
  38. elif _module_exists(project_name):
  39. print('Error: Module %r already exists' % project_name)
  40. else:
  41. return True
  42. return False
  43. def _copytree(self, src, dst):
  44. """
  45. Since the original function always creates the directory, to resolve
  46. the issue a new function had to be created. It's a simple copy and
  47. was reduced for this case.
  48. More info at:
  49. https://github.com/scrapy/scrapy/pull/2005
  50. """
  51. ignore = IGNORE
  52. names = os.listdir(src)
  53. ignored_names = ignore(src, names)
  54. if not os.path.exists(dst):
  55. os.makedirs(dst)
  56. for name in names:
  57. if name in ignored_names:
  58. continue
  59. srcname = os.path.join(src, name)
  60. dstname = os.path.join(dst, name)
  61. if os.path.isdir(srcname):
  62. self._copytree(srcname, dstname)
  63. else:
  64. copy2(srcname, dstname)
  65. copystat(src, dst)
  66. def run(self, args, opts):
  67. if len(args) not in (1, 2):
  68. raise UsageError()
  69. project_name = args[0]
  70. project_dir = args[0]
  71. if len(args) == 2:
  72. project_dir = args[1]
  73. if exists(join(project_dir, 'scrapy.cfg')):
  74. self.exitcode = 1
  75. print('Error: scrapy.cfg already exists in %s' % abspath(project_dir))
  76. return
  77. if not self._is_valid_name(project_name):
  78. self.exitcode = 1
  79. return
  80. self._copytree(self.templates_dir, abspath(project_dir))
  81. move(join(project_dir, 'module'), join(project_dir, project_name))
  82. for paths in TEMPLATES_TO_RENDER:
  83. path = join(*paths)
  84. tplfile = join(project_dir,
  85. string.Template(path).substitute(project_name=project_name))
  86. render_templatefile(tplfile, project_name=project_name,
  87. ProjectName=string_camelcase(project_name))
  88. print("New Scrapy project '%s', using template directory '%s', "
  89. "created in:" % (project_name, self.templates_dir))
  90. print(" %s\n" % abspath(project_dir))
  91. print("You can start your first spider with:")
  92. print(" cd %s" % project_dir)
  93. print(" scrapy genspider example example.com")
  94. @property
  95. def templates_dir(self):
  96. _templates_base_dir = self.settings['TEMPLATES_DIR'] or \
  97. join(scrapy.__path__[0], 'templates')
  98. return join(_templates_base_dir, 'project')