123456789101112131415161718192021 |
- # -*- coding: utf-8 -*-
- import scrapy
- from scrapy.linkextractors import LinkExtractor
- from scrapy.spiders import CrawlSpider, Rule
- class $classname(CrawlSpider):
- name = '$name'
- allowed_domains = ['$domain']
- start_urls = ['http://$domain/']
- rules = (
- Rule(LinkExtractor(allow=r'Items/'), callback='parse_item', follow=True),
- )
- def parse_item(self, response):
- item = {}
- #item['domain_id'] = response.xpath('//input[@id="sid"]/@value').get()
- #item['name'] = response.xpath('//div[@id="name"]').get()
- #item['description'] = response.xpath('//div[@id="description"]').get()
- return item
|