crawl.tmpl 657 B

123456789101112131415161718192021
  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. from scrapy.linkextractors import LinkExtractor
  4. from scrapy.spiders import CrawlSpider, Rule
  5. class $classname(CrawlSpider):
  6. name = '$name'
  7. allowed_domains = ['$domain']
  8. start_urls = ['http://$domain/']
  9. rules = (
  10. Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
  11. )
  12. def parse_item(self, response):
  13. item = {}
  14. #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
  15. #item['name'] = response.xpath('//div[@id="name"]').get()
  16. #item['description'] = response.xpath('//div[@id="description"]').get()
  17. return item