12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- """
- Extension for collecting core stats like items scraped and start/finish times
- """
- from datetime import datetime
- from scrapy import signals
- class CoreStats(object):
- def __init__(self, stats):
- self.stats = stats
- self.start_time = None
- @classmethod
- def from_crawler(cls, crawler):
- o = cls(crawler.stats)
- crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
- crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
- crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
- crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
- crawler.signals.connect(o.response_received, signal=signals.response_received)
- return o
- def spider_opened(self, spider):
- self.start_time = datetime.utcnow()
- self.stats.set_value('start_time', self.start_time, spider=spider)
- def spider_closed(self, spider, reason):
- finish_time = datetime.utcnow()
- elapsed_time = finish_time - self.start_time
- elapsed_time_seconds = elapsed_time.total_seconds()
- self.stats.set_value('elapsed_time_seconds', elapsed_time_seconds, spider=spider)
- self.stats.set_value('finish_time', finish_time, spider=spider)
- self.stats.set_value('finish_reason', reason, spider=spider)
- def item_scraped(self, item, spider):
- self.stats.inc_value('item_scraped_count', spider=spider)
- def response_received(self, spider):
- self.stats.inc_value('response_received_count', spider=spider)
- def item_dropped(self, item, spider, exception):
- reason = exception.__class__.__name__
- self.stats.inc_value('item_dropped_count', spider=spider)
- self.stats.inc_value('item_dropped_reasons_count/%s' % reason, spider=spider)
|