statscollectors.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. """
  2. Scrapy extension for collecting scraping stats
  3. """
  4. import pprint
  5. import logging
  6. logger = logging.getLogger(__name__)
  7. class StatsCollector(object):
  8. def __init__(self, crawler):
  9. self._dump = crawler.settings.getbool('STATS_DUMP')
  10. self._stats = {}
  11. def get_value(self, key, default=None, spider=None):
  12. return self._stats.get(key, default)
  13. def get_stats(self, spider=None):
  14. return self._stats
  15. def set_value(self, key, value, spider=None):
  16. self._stats[key] = value
  17. def set_stats(self, stats, spider=None):
  18. self._stats = stats
  19. def inc_value(self, key, count=1, start=0, spider=None):
  20. d = self._stats
  21. d[key] = d.setdefault(key, start) + count
  22. def max_value(self, key, value, spider=None):
  23. self._stats[key] = max(self._stats.setdefault(key, value), value)
  24. def min_value(self, key, value, spider=None):
  25. self._stats[key] = min(self._stats.setdefault(key, value), value)
  26. def clear_stats(self, spider=None):
  27. self._stats.clear()
  28. def open_spider(self, spider):
  29. pass
  30. def close_spider(self, spider, reason):
  31. if self._dump:
  32. logger.info("Dumping Scrapy stats:\n" + pprint.pformat(self._stats),
  33. extra={'spider': spider})
  34. self._persist_stats(self._stats, spider)
  35. def _persist_stats(self, stats, spider):
  36. pass
  37. class MemoryStatsCollector(StatsCollector):
  38. def __init__(self, crawler):
  39. super(MemoryStatsCollector, self).__init__(crawler)
  40. self.spider_stats = {}
  41. def _persist_stats(self, stats, spider):
  42. self.spider_stats[spider.name] = stats
  43. class DummyStatsCollector(StatsCollector):
  44. def get_value(self, key, default=None, spider=None):
  45. return default
  46. def set_value(self, key, value, spider=None):
  47. pass
  48. def set_stats(self, stats, spider=None):
  49. pass
  50. def inc_value(self, key, count=1, start=0, spider=None):
  51. pass
  52. def max_value(self, key, value, spider=None):
  53. pass
  54. def min_value(self, key, value, spider=None):
  55. pass