corestats.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. """
  2. Extension for collecting core stats like items scraped and start/finish times
  3. """
  4. from datetime import datetime
  5. from scrapy import signals
  6. class CoreStats(object):
  7. def __init__(self, stats):
  8. self.stats = stats
  9. self.start_time = None
  10. @classmethod
  11. def from_crawler(cls, crawler):
  12. o = cls(crawler.stats)
  13. crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
  14. crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
  15. crawler.signals.connect(o.item_scraped, signal=signals.item_scraped)
  16. crawler.signals.connect(o.item_dropped, signal=signals.item_dropped)
  17. crawler.signals.connect(o.response_received, signal=signals.response_received)
  18. return o
  19. def spider_opened(self, spider):
  20. self.start_time = datetime.utcnow()
  21. self.stats.set_value('start_time', self.start_time, spider=spider)
  22. def spider_closed(self, spider, reason):
  23. finish_time = datetime.utcnow()
  24. elapsed_time = finish_time - self.start_time
  25. elapsed_time_seconds = elapsed_time.total_seconds()
  26. self.stats.set_value('elapsed_time_seconds', elapsed_time_seconds, spider=spider)
  27. self.stats.set_value('finish_time', finish_time, spider=spider)
  28. self.stats.set_value('finish_reason', reason, spider=spider)
  29. def item_scraped(self, item, spider):
  30. self.stats.inc_value('item_scraped_count', spider=spider)
  31. def response_received(self, spider):
  32. self.stats.inc_value('response_received_count', spider=spider)
  33. def item_dropped(self, item, spider, exception):
  34. reason = exception.__class__.__name__
  35. self.stats.inc_value('item_dropped_count', spider=spider)
  36. self.stats.inc_value('item_dropped_reasons_count/%s' % reason, spider=spider)