123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- from __future__ import print_function
- import os
- import shutil
- import string
- from importlib import import_module
- from os.path import join, dirname, abspath, exists, splitext
- import scrapy
- from scrapy.commands import ScrapyCommand
- from scrapy.utils.template import render_templatefile, string_camelcase
- from scrapy.exceptions import UsageError
- def sanitize_module_name(module_name):
- """Sanitize the given module name, by replacing dashes and points
- with underscores and prefixing it with a letter if it doesn't start
- with one
- """
- module_name = module_name.replace('-', '_').replace('.', '_')
- if module_name[0] not in string.ascii_letters:
- module_name = "a" + module_name
- return module_name
- class Command(ScrapyCommand):
- requires_project = False
- default_settings = {'LOG_ENABLED': False}
- def syntax(self):
- return "[options] <name> <domain>"
- def short_desc(self):
- return "Generate new spider using pre-defined templates"
- def add_options(self, parser):
- ScrapyCommand.add_options(self, parser)
- parser.add_option("-l", "--list", dest="list", action="store_true",
- help="List available templates")
- parser.add_option("-e", "--edit", dest="edit", action="store_true",
- help="Edit spider after creating it")
- parser.add_option("-d", "--dump", dest="dump", metavar="TEMPLATE",
- help="Dump template to standard output")
- parser.add_option("-t", "--template", dest="template", default="basic",
- help="Uses a custom template.")
- parser.add_option("--force", dest="force", action="store_true",
- help="If the spider already exists, overwrite it with the template")
- def run(self, args, opts):
- if opts.list:
- self._list_templates()
- return
- if opts.dump:
- template_file = self._find_template(opts.dump)
- if template_file:
- with open(template_file, "r") as f:
- print(f.read())
- return
- if len(args) != 2:
- raise UsageError()
- name, domain = args[0:2]
- module = sanitize_module_name(name)
- if self.settings.get('BOT_NAME') == module:
- print("Cannot create a spider with the same name as your project")
- return
- try:
- spidercls = self.crawler_process.spider_loader.load(name)
- except KeyError:
- pass
- else:
- # if spider already exists and not --force then halt
- if not opts.force:
- print("Spider %r already exists in module:" % name)
- print(" %s" % spidercls.__module__)
- return
- template_file = self._find_template(opts.template)
- if template_file:
- self._genspider(module, name, domain, opts.template, template_file)
- if opts.edit:
- self.exitcode = os.system('scrapy edit "%s"' % name)
- def _genspider(self, module, name, domain, template_name, template_file):
- """Generate the spider module, based on the given template"""
- tvars = {
- 'project_name': self.settings.get('BOT_NAME'),
- 'ProjectName': string_camelcase(self.settings.get('BOT_NAME')),
- 'module': module,
- 'name': name,
- 'domain': domain,
- 'classname': '%sSpider' % ''.join(s.capitalize() \
- for s in module.split('_'))
- }
- if self.settings.get('NEWSPIDER_MODULE'):
- spiders_module = import_module(self.settings['NEWSPIDER_MODULE'])
- spiders_dir = abspath(dirname(spiders_module.__file__))
- else:
- spiders_module = None
- spiders_dir = "."
- spider_file = "%s.py" % join(spiders_dir, module)
- shutil.copyfile(template_file, spider_file)
- render_templatefile(spider_file, **tvars)
- print("Created spider %r using template %r " % (name, \
- template_name), end=('' if spiders_module else '\n'))
- if spiders_module:
- print("in module:\n %s.%s" % (spiders_module.__name__, module))
- def _find_template(self, template):
- template_file = join(self.templates_dir, '%s.tmpl' % template)
- if exists(template_file):
- return template_file
- print("Unable to find template: %s\n" % template)
- print('Use "scrapy genspider --list" to see all available templates.')
- def _list_templates(self):
- print("Available templates:")
- for filename in sorted(os.listdir(self.templates_dir)):
- if filename.endswith('.tmpl'):
- print(" %s" % splitext(filename)[0])
- @property
- def templates_dir(self):
- _templates_base_dir = self.settings['TEMPLATES_DIR'] or \
- join(scrapy.__path__[0], 'templates')
- return join(_templates_base_dir, 'spiders')
|